This section presents tables summarizing important variables in our data. The variables for academic performance will be used to predict success in college-level courses and the demographic variables will be used to examine how our predictive models perform for different groups of students.
There are three sets of tables. The first set of tables use the sample of students who enrolled in college-level English. The second set of tables use the sample of students who enrolled in college-level math. These two samples are the samples used for predictive analyses. The third set of tables use all students in the data and this sample was not used in the predictive analyses.
In general, missing values were imputed with a value of zero, and an additional 0/1 binary variable was created, which flagged cases that were imputed. For example, if student A had no missing values, but student B had a missing value for high school GPA, the imputed GPA variable would be 0 and the binary flag would be 1 for student B. When including variables in the predictive models, both the imputed variable and the accompanying binary flag were included in the models.
The variables that capture high school course taking behavior do not contain any missing values, because the 0 value captures both students who did not take the course and students who did not pass the course. In most cases, the high school courses identified in the data were required math courses.
left_join(desc_table(data_eng_cl, vars1, 1, TRUE),
desc_table(data_eng_cl, vars1miss, 0, TRUE) %>%
select(name, contains("Mean")) %>%
rename_with(~str_replace_all(.,"_Mean","_Percent Missing")),
by = c("name")) %>%
rename("Category" = name,
"Measure" = group) %>%
select(Measure,
Category,starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Measure") %>%
flextable() %>%
separate_header()
Measure | Category | ASU | ASUMID | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | SAU | SAUTECH | All | |||||||||||||||||
Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | ||
ACT Scores | |||||||||||||||||||||||||||||
ACT English | 24.3 | 10.0 | 36.0 | 17.4 | 7.0 | 33.0 | 6 | 56 | 21.8 | 8.0 | 36.0 | 65 | 23.1 | 3 | 36.0 | 28 | 23.2 | 9.0 | 36 | 19.2 | 6.0 | 36.0 | 8 | 27 | 23.1 | 3.0 | 36.0 | 24 | |
ACT Math | 22.4 | 12.0 | 36.0 | 17.5 | 1.0 | 34.0 | 6 | 52 | 20.6 | 11.0 | 33.0 | 65 | 21.9 | 11 | 36.0 | 28 | 21.6 | 13.0 | 36 | 18.6 | 11.0 | 31.0 | 8 | 27 | 21.7 | 1.0 | 36.0 | 24 | |
ACT Reading | 25.1 | 10.0 | 36.0 | 18.9 | 5.0 | 35.0 | 6 | 55 | 22.9 | 11.0 | 36.0 | 65 | 23.9 | 10 | 36.0 | 28 | 24.2 | 11.0 | 36 | 20.4 | 8.0 | 36.0 | 8 | 27 | 24.0 | 5.0 | 36.0 | 24 | |
High School Attendance | |||||||||||||||||||||||||||||
Days Absent | 8.0 | 0.0 | 97.0 | 14.1 | 1.0 | 39.0 | 44 | 94 | 6.4 | 0.0 | 55.0 | 30 | 6.8 | 0 | 77.0 | 51 | 7.7 | 0.0 | 91 | 7.2 | 0.0 | 54.0 | 56 | 52 | 7.2 | 0.0 | 97.0 | 49 | |
Days Present | 169.6 | 69.0 | 196.0 | 163.5 | 140.0 | 178.0 | 44 | 94 | 170.5 | 115.0 | 178.0 | 30 | 169.9 | 99 | 192.0 | 51 | 169.8 | 87.0 | 186 | 170.2 | 98.0 | 182.0 | 56 | 52 | 169.9 | 69.0 | 196.0 | 49 | |
Other High School Metrics | |||||||||||||||||||||||||||||
Class Rank | 66.9 | 0.0 | 652.0 | 164.2 | 3.0 | 350.0 | 44 | 94 | 22.6 | 0.0 | 340.0 | 34 | 139.2 | 0 | 983.0 | 52 | 66.6 | 0.0 | 665 | 39.7 | 0.0 | 903.0 | 56 | 54 | 96.1 | 0.0 | 983.0 | 50 | |
GPA | 3.5 | 1.4 | 5.2 | 2.7 | 1.2 | 4.1 | 2 | 17 | 3.4 | 1.3 | 4.9 | 4 | 3.3 | 1 | 5.2 | 16 | 3.4 | 0.6 | 5 | 3.1 | 1.3 | 4.3 | 10 | 11 | 3.3 | 0.6 | 5.2 | 11 | |
10th Grade Assessments | |||||||||||||||||||||||||||||
10th Grade Math | 431.5 | 412.0 | 451.0 | 76 | 100 | 431.8 | 411.0 | 449.0 | 62 | 433.3 | 400 | 452.0 | 72 | 430.0 | 414.0 | 450 | 428.4 | 409.0 | 445.0 | 82 | 78 | 432.3 | 400.0 | 452.0 | 74 | ||||
10th Grade Reading | 432.7 | 415.0 | 445.0 | 77 | 100 | 433.0 | 413.0 | 445.0 | 63 | 433.4 | 412 | 445.0 | 72 | 431.8 | 415.0 | 442 | 430.5 | 413.0 | 441.0 | 83 | 76 | 432.9 | 412.0 | 445.0 | 74 | ||||
10th Grade Science | 432.9 | 406.0 | 449.0 | 77 | 100 | 433.1 | 409.0 | 449.0 | 63 | 434.0 | 400 | 449.0 | 72 | 432.3 | 409.0 | 446 | 429.6 | 406.0 | 446.0 | 83 | 75 | 433.3 | 400.0 | 449.0 | 74 | ||||
9th Grade Assessments | |||||||||||||||||||||||||||||
9th Grade Math | 428.5 | 414.0 | 445.0 | 89 | 100 | 430.5 | 413.0 | 446.0 | 71 | 431.0 | 409 | 454.0 | 81 | 428.6 | 411.0 | 443 | 427.7 | 414.0 | 445.0 | 92 | 89 | 430.3 | 409.0 | 454.0 | 84 | ||||
9th Grade Reading | 430.7 | 414.0 | 445.0 | 89 | 100 | 431.6 | 418.0 | 442.0 | 73 | 431.8 | 413 | 445.0 | 81 | 430.4 | 419.0 | 444 | 430.2 | 414.0 | 440.0 | 92 | 87 | 431.5 | 413.0 | 445.0 | 84 | ||||
9th Grade Science | 430.6 | 410.0 | 446.0 | 89 | 100 | 431.8 | 409.0 | 446.0 | 73 | 432.4 | 403 | 449.0 | 81 | 430.9 | 417.0 | 446 | 430.3 | 415.0 | 444.0 | 92 | 87 | 431.8 | 403.0 | 449.0 | 84 | ||||
Passed High School Course (%) | |||||||||||||||||||||||||||||
Algebra 1 | 6.9 | 0.0 | 100.0 | 0.0 | 0.0 | 0.0 | 16.8 | 0.0 | 100.0 | 12.0 | 0 | 100.0 | 5.1 | 0.0 | 100 | 6.3 | 0.0 | 100.0 | 10.0 | 0.0 | 100.0 | ||||||||
Algebra 2 | 28.7 | 0.0 | 100.0 | 0.8 | 0.0 | 100.0 | 43.8 | 0.0 | 100.0 | 30.4 | 0 | 100.0 | 20.8 | 0.0 | 100 | 28.3 | 0.0 | 100.0 | 29.5 | 0.0 | 100.0 | ||||||||
Geometry | 17.4 | 0.0 | 100.0 | 0.2 | 0.0 | 100.0 | 36.7 | 0.0 | 100.0 | 22.3 | 0 | 100.0 | 14.4 | 0.0 | 100 | 22.0 | 0.0 | 100.0 | 21.1 | 0.0 | 100.0 |
bind_rows(desc_table(data_eng_cl, vars2, 1, TRUE) %>%
select(name, group, contains("Mean")),
desc_table(data_eng_cl, vars2miss,0, TRUE) %>%
select(name, group, contains("Mean"))) %>%
rename("Category" = name,
"Subgroup" = group) %>%
rename_with(~str_remove_all(.,"_Mean")) %>%
arrange(Subgroup) %>%
select(Subgroup,
Category, starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Subgroup") %>%
flextable()
Subgroup | Category | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | All |
Age | ||||||||
21-30 | 1.7 | 28.8 | 19.9 | 20.7 | 6.6 | 46.0 | 16.7 | |
31 or older | 0.1 | 22.0 | 7.4 | 8.3 | 1.4 | 18.9 | 6.7 | |
20 or younger | 98.2 | 49.2 | 34.9 | 38.3 | 92.1 | 15.1 | 55.6 | |
Age missing | 0.0 | 0.0 | 38.0 | 33.0 | 0.0 | 20.0 | 21.0 | |
ESL Status | ||||||||
No | 99.3 | 0.0 | 52.1 | 55.6 | 100.0 | 48.0 | 67.9 | |
Yes | 0.7 | 0.0 | 10.2 | 11.7 | 0.0 | 32.0 | 9.0 | |
ESL status missing | 0.0 | 100.0 | 38.0 | 33.0 | 0.0 | 20.0 | 23.0 | |
Gender | ||||||||
Men | 42.3 | 32.0 | 20.0 | 27.4 | 49.7 | 33.3 | 32.7 | |
Women | 57.6 | 68.0 | 42.3 | 39.7 | 50.3 | 46.7 | 46.1 | |
Gender missing | 0.0 | 0.0 | 38.0 | 33.0 | 0.0 | 20.0 | 21.0 | |
High School Experience | ||||||||
Ever Economically Disadvantaged | 0.2 | 0.1 | 0.4 | 0.1 | 0.2 | 0.3 | 0.2 | |
Ever Section 504 Plan | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Homeless | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Limited English Proficiency | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Special Education | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Gifted/Talented | 0.0 | 0.0 | 0.1 | 0.1 | 0.0 | 0.0 | 0.1 | |
Pell Eligible | ||||||||
Pell Ineligible | 56.5 | 21.2 | 37.7 | 48.0 | 45.9 | 0.0 | 45.4 | |
Pell Eligible | 43.5 | 78.8 | 24.6 | 19.3 | 54.1 | 0.0 | 28.5 | |
Pell status missing | 0.0 | 0.0 | 38.0 | 33.0 | 0.0 | 100.0 | 26.0 | |
Race/Ethnicity | ||||||||
Asian | 0.8 | 1.7 | 0.2 | 1.8 | 0.8 | 0.5 | 1.3 | |
Black | 12.6 | 57.1 | 4.9 | 1.4 | 20.3 | 30.0 | 8.9 | |
Hispanic | 4.3 | 4.1 | 11.0 | 12.2 | 4.7 | 3.1 | 8.8 | |
Other | 3.2 | 1.0 | 7.2 | 4.4 | 1.5 | 0.4 | 3.8 | |
White | 74.4 | 31.5 | 38.4 | 45.0 | 70.5 | 45.4 | 53.4 | |
Race/ethnicity missing | 5.0 | 5.0 | 38.0 | 35.0 | 2.0 | 21.0 | 24.0 | |
Received GED | ||||||||
Did not receive | 0.0 | 86.9 | 60.4 | 63.9 | 90.2 | 75.1 | 52.0 | |
Received | 0.0 | 13.1 | 1.9 | 3.3 | 9.7 | 4.9 | 3.3 | |
GED status missing | 100.0 | 0.0 | 38.0 | 33.0 | 0.0 | 20.0 | 45.0 |
left_join(desc_table(data_math_cl, vars1, 1, TRUE),
desc_table(data_math_cl, vars1miss, 0, TRUE) %>%
select(name, contains("Mean")) %>%
rename_with(~str_replace_all(.,"_Mean","_Percent Missing")),
by = c("name")) %>%
rename("Category" = name,
"Measure" = group) %>%
select(Measure,
Category, starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Measure") %>%
flextable() %>%
separate_header()
Measure | Category | ASU | ASUMID | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | SAU | SAUTECH | All | |||||||||||||||||
Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | ||
ACT Scores | |||||||||||||||||||||||||||||
ACT English | 25.2 | 10.0 | 36.0 | 17.2 | 9.0 | 33.0 | 6 | 57 | 21.3 | 7.0 | 36.0 | 62 | 23.5 | 8.0 | 36.0 | 26 | 23.1 | 8.0 | 36 | 19.4 | 7.0 | 36.0 | 9 | 22 | 23.5 | 7.0 | 36 | 22 | |
ACT Math | 23.6 | 14.0 | 35.0 | 17.4 | 1.0 | 30.0 | 6 | 53 | 21.1 | 12.0 | 33.0 | 62 | 23.1 | 8.0 | 36.0 | 26 | 22.8 | 14.0 | 36 | 19.0 | 6.0 | 30.0 | 8 | 22 | 22.8 | 1.0 | 36 | 22 | |
ACT Reading | 25.9 | 11.0 | 36.0 | 18.8 | 5.0 | 35.0 | 6 | 56 | 22.3 | 7.0 | 36.0 | 62 | 24.3 | 10.0 | 36.0 | 26 | 24.5 | 12.0 | 36 | 20.5 | 9.0 | 36.0 | 9 | 22 | 24.4 | 5.0 | 36 | 22 | |
High School Attendance | |||||||||||||||||||||||||||||
Days Absent | 7.6 | 0.0 | 76.0 | 15.2 | 1.0 | 39.0 | 43 | 94 | 6.5 | 0.0 | 96.0 | 28 | 6.4 | 0.0 | 77.0 | 53 | 7.2 | 0.0 | 61 | 6.7 | 0.0 | 42.0 | 56 | 45 | 6.8 | 0.0 | 96 | 50 | |
Days Present | 170.2 | 103.0 | 193.0 | 162.3 | 140.0 | 178.0 | 43 | 94 | 170.6 | 81.0 | 184.0 | 28 | 170.4 | 101.0 | 192.0 | 53 | 170.4 | 114.0 | 186 | 170.7 | 98.0 | 181.0 | 56 | 45 | 170.4 | 81.0 | 193 | 50 | |
Other High School Metrics | |||||||||||||||||||||||||||||
Class Rank | 56.8 | 0.0 | 610.0 | 176.6 | 3.0 | 337.0 | 44 | 94 | 24.1 | 0.0 | 249.0 | 31 | 133.3 | 0.0 | 911.0 | 54 | 59.2 | 0.0 | 665 | 36.4 | 0.0 | 903.0 | 56 | 46 | 87.0 | 0.0 | 911 | 51 | |
GPA | 3.6 | 1.4 | 4.9 | 2.7 | 1.3 | 4.1 | 2 | 17 | 3.4 | 1.3 | 4.2 | 4 | 3.3 | 1.1 | 4.7 | 20 | 3.4 | 0.6 | 5 | 3.2 | 1.2 | 4.3 | 11 | 11 | 3.4 | 0.6 | 5 | 13 | |
10th Grade Assessments | |||||||||||||||||||||||||||||
10th Grade Math | 433.9 | 415.0 | 450.0 | 77 | 100 | 432.7 | 411.0 | 449.0 | 60 | 434.8 | 400.0 | 454.0 | 74 | 432.5 | 413.0 | 445 | 428.3 | 412.0 | 445.0 | 83 | 73 | 433.7 | 400.0 | 454 | 75 | ||||
10th Grade Reading | 433.9 | 416.0 | 445.0 | 78 | 100 | 432.6 | 411.0 | 444.0 | 61 | 433.7 | 412.0 | 445.0 | 74 | 432.3 | 415.0 | 443 | 430.1 | 411.0 | 443.0 | 84 | 71 | 433.2 | 411.0 | 445 | 76 | ||||
10th Grade Science | 435.1 | 413.0 | 449.0 | 78 | 100 | 433.2 | 406.0 | 449.0 | 61 | 434.7 | 400.0 | 449.0 | 74 | 434.0 | 409.0 | 446 | 429.5 | 408.0 | 446.0 | 84 | 71 | 434.2 | 400.0 | 449 | 76 | ||||
9th Grade Assessments | |||||||||||||||||||||||||||||
9th Grade Math | 431.2 | 415.0 | 445.0 | 90 | 100 | 432.3 | 413.0 | 446.0 | 73 | 432.3 | 411.0 | 452.0 | 83 | 431.0 | 413.0 | 443 | 426.8 | 408.0 | 445.0 | 91 | 86 | 431.7 | 408.0 | 452 | 85 | ||||
9th Grade Reading | 431.9 | 414.0 | 446.0 | 91 | 100 | 431.6 | 418.0 | 442.0 | 75 | 432.4 | 415.0 | 445.0 | 83 | 431.2 | 419.0 | 441 | 428.7 | 411.0 | 439.0 | 92 | 83 | 431.9 | 411.0 | 446 | 86 | ||||
9th Grade Science | 432.6 | 410.0 | 446.0 | 91 | 100 | 432.6 | 406.0 | 446.0 | 75 | 433.3 | 400.0 | 449.0 | 83 | 433.0 | 415.0 | 446 | 428.8 | 410.0 | 444.0 | 92 | 83 | 432.8 | 400.0 | 449 | 86 | ||||
Passed High School Course (%) | |||||||||||||||||||||||||||||
Algebra 1 | 4.8 | 0.0 | 100.0 | 0.0 | 0.0 | 0.0 | 14.3 | 0.0 | 100.0 | 9.9 | 0.0 | 100.0 | 4.6 | 0.0 | 100 | 8.9 | 0.0 | 100.0 | 8.1 | 0.0 | 100 | ||||||||
Algebra 2 | 27.2 | 0.0 | 100.0 | 1.1 | 0.0 | 100.0 | 44.2 | 0.0 | 100.0 | 28.3 | 0.0 | 100.0 | 20.2 | 0.0 | 100 | 32.4 | 0.0 | 100.0 | 28.0 | 0.0 | 100 | ||||||||
Geometry | 15.4 | 0.0 | 100.0 | 0.2 | 0.0 | 100.0 | 36.8 | 0.0 | 100.0 | 20.2 | 0.0 | 100.0 | 13.9 | 0.0 | 100 | 25.0 | 0.0 | 100.0 | 19.4 | 0.0 | 100 |
bind_rows(desc_table(data_math_cl, vars2, 1, TRUE) %>%
select(name, group, contains("Mean")),
desc_table(data_math_cl, vars2miss,0, TRUE) %>%
select(name, group, contains("Mean"))) %>%
rename("Category" = name,
"Subgroup" = group) %>%
rename_with(~str_remove_all(.,"_Mean")) %>%
arrange(Subgroup) %>%
select(Subgroup,
Category, starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Subgroup") %>%
flextable()
Subgroup | Category | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | All |
Age | ||||||||
21-30 | 1.5 | 30.0 | 19.1 | 20.6 | 6.4 | 40.4 | 15.8 | |
31 or older | 0.2 | 26.8 | 6.2 | 6.2 | 1.9 | 19.0 | 5.6 | |
20 or younger | 98.4 | 43.2 | 39.2 | 35.1 | 91.7 | 18.3 | 56.0 | |
Age missing | 0.0 | 0.0 | 36.0 | 38.0 | 0.0 | 22.0 | 23.0 | |
ESL Status | ||||||||
No | 99.3 | 0.0 | 53.8 | 49.2 | 100.0 | 45.9 | 65.8 | |
Yes | 0.7 | 0.0 | 10.8 | 12.8 | 0.0 | 31.8 | 9.2 | |
ESL status missing | 0.0 | 100.0 | 35.0 | 38.0 | 0.0 | 22.0 | 25.0 | |
Gender | ||||||||
Men | 45.4 | 28.0 | 22.0 | 28.8 | 56.2 | 29.1 | 35.3 | |
Women | 54.6 | 72.0 | 42.6 | 32.9 | 43.7 | 48.7 | 42.0 | |
Gender missing | 0.0 | 0.0 | 35.0 | 38.0 | 0.0 | 22.0 | 23.0 | |
High School Experience | ||||||||
Ever Economically Disadvantaged | 0.2 | 0.0 | 0.4 | 0.1 | 0.2 | 0.3 | 0.2 | |
Ever Section 504 Plan | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Homeless | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Limited English Proficiency | 0.0 | 0.0 | 0.1 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Special Education | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Gifted/Talented | 0.0 | 0.0 | 0.1 | 0.1 | 0.0 | 0.1 | 0.1 | |
Pell Eligible | ||||||||
Pell Ineligible | 60.4 | 25.5 | 37.3 | 47.4 | 48.3 | 0.0 | 46.4 | |
Pell Eligible | 39.6 | 74.5 | 27.3 | 14.5 | 51.7 | 0.0 | 26.1 | |
Pell status missing | 0.0 | 0.0 | 35.0 | 38.0 | 0.0 | 100.0 | 28.0 | |
Race/Ethnicity | ||||||||
Asian | 0.9 | 1.6 | 0.1 | 2.3 | 1.2 | 0.4 | 1.5 | |
Black | 10.3 | 60.0 | 6.4 | 1.2 | 18.8 | 29.9 | 8.9 | |
Hispanic | 4.2 | 4.1 | 12.0 | 11.1 | 4.9 | 2.9 | 8.1 | |
Other | 3.4 | 0.9 | 7.0 | 3.6 | 1.6 | 0.2 | 3.3 | |
White | 75.7 | 30.0 | 38.4 | 40.4 | 71.0 | 44.0 | 52.1 | |
Race/ethnicity missing | 5.0 | 3.0 | 36.0 | 41.0 | 3.0 | 23.0 | 26.0 | |
Received GED | ||||||||
Did not receive | 0.0 | 89.3 | 63.1 | 59.8 | 90.8 | 73.8 | 50.5 | |
Received | 0.0 | 10.7 | 1.6 | 1.9 | 9.2 | 3.9 | 2.6 | |
GED status missing | 100.0 | 0.0 | 35.0 | 38.0 | 0.0 | 22.0 | 47.0 |
left_join(desc_table(data2, vars1, 1, TRUE),
desc_table(data2, vars1miss, 0, TRUE) %>%
select(name, contains("Mean")) %>%
rename_with(~str_replace_all(.,"_Mean","_Percent Missing")),
by = c("name")) %>%
rename("Category" = name,
"Measure" = group) %>%
select(Measure,
Category, starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Measure") %>%
flextable() %>%
separate_header()
Measure | Category | ASU | ASUMID | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | SAU | SAUTECH | All | |||||||||||||||||
Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Percent Missing | Mean | Min | Max | Mean | Min | Max | Percent Missing | Percent Missing | Mean | Min | Max | Percent Missing | ||
ACT Scores | |||||||||||||||||||||||||||||
ACT English | 24.8 | 10.0 | 36.0 | 17.4 | 6 | 34.0 | 5 | 64 | 19.6 | 7.0 | 36.0 | 67 | 22.0 | 2.0 | 36.0 | 41 | 21.5 | 6.0 | 36 | 17.7 | 5.0 | 36.0 | 11 | 58 | 22.0 | 2.0 | 36.0 | 37 | |
ACT Math | 22.8 | 12.0 | 36.0 | 17.4 | 1 | 34.0 | 5 | 61 | 19.3 | 10.0 | 33.0 | 67 | 21.3 | 7.0 | 36.0 | 41 | 20.7 | 7.0 | 36 | 17.9 | 6.0 | 35.0 | 11 | 58 | 21.2 | 1.0 | 36.0 | 37 | |
ACT Reading | 25.6 | 10.0 | 36.0 | 18.6 | 5 | 35.0 | 5 | 64 | 20.8 | 7.0 | 36.0 | 67 | 23.0 | 5.0 | 36.0 | 41 | 22.9 | 8.0 | 36 | 18.9 | 3.0 | 36.0 | 11 | 58 | 23.0 | 3.0 | 36.0 | 37 | |
High School Attendance | |||||||||||||||||||||||||||||
Days Absent | 7.6 | 0.0 | 97.0 | 13.3 | 1 | 49.0 | 42 | 95 | 7.1 | 0.0 | 96.0 | 44 | 7.3 | 0.0 | 77.0 | 64 | 7.4 | 0.0 | 91 | 8.9 | 0.0 | 70.0 | 57 | 67 | 7.5 | 0.0 | 97.0 | 60 | |
Days Present | 170.1 | 69.0 | 196.0 | 164.6 | 129 | 178.0 | 42 | 95 | 169.7 | 81.0 | 184.0 | 44 | 169.4 | 81.0 | 192.0 | 64 | 170.2 | 87.0 | 186 | 168.1 | 69.0 | 199.0 | 57 | 67 | 169.5 | 69.0 | 199.0 | 60 | |
Other High School Metrics | |||||||||||||||||||||||||||||
Class Rank | 60.3 | 0.0 | 652.0 | 176.6 | 0 | 926.0 | 43 | 95 | 29.5 | 0.0 | 340.0 | 47 | 157.4 | 0.0 | 988.0 | 66 | 60.7 | 0.0 | 665 | 56.5 | 0.0 | 931.0 | 57 | 69 | 105.6 | 0.0 | 988.0 | 61 | |
GPA | 3.6 | 1.4 | 5.2 | 2.7 | 1 | 4.2 | 2 | 25 | 3.2 | 1.3 | 4.9 | 6 | 3.1 | 0.7 | 5.5 | 21 | 3.3 | 0.6 | 5 | 3.0 | 0.5 | 4.3 | 14 | 41 | 3.2 | 0.5 | 5.5 | 19 | |
10th Grade Assessments | |||||||||||||||||||||||||||||
10th Grade Math | 432.3 | 412.0 | 451.0 | 74 | 100 | 429.7 | 410.0 | 449.0 | 70 | 432.2 | 400.0 | 456.0 | 81 | 429.3 | 411.0 | 450 | 424.5 | 407.0 | 447.0 | 82 | 84 | 431.0 | 400.0 | 456.0 | 81 | ||||
10th Grade Reading | 433.1 | 415.0 | 445.0 | 75 | 100 | 430.7 | 411.0 | 445.0 | 71 | 432.3 | 408.0 | 446.0 | 81 | 430.8 | 412.0 | 443 | 426.6 | 407.0 | 445.0 | 83 | 83 | 431.6 | 407.0 | 446.0 | 81 | ||||
10th Grade Science | 433.6 | 406.0 | 449.0 | 75 | 100 | 430.6 | 406.0 | 449.0 | 71 | 432.7 | 400.0 | 449.0 | 81 | 430.9 | 404.0 | 446 | 426.3 | 403.0 | 738.0 | 83 | 83 | 431.8 | 400.0 | 738.0 | 81 | ||||
9th Grade Assessments | |||||||||||||||||||||||||||||
9th Grade Math | 429.4 | 414.0 | 445.0 | 87 | 100 | 429.4 | 407.0 | 446.0 | 79 | 430.5 | 409.0 | 456.0 | 87 | 427.7 | 410.0 | 443 | 423.1 | 406.0 | 445.0 | 91 | 90 | 429.3 | 406.0 | 456.0 | 88 | ||||
9th Grade Reading | 431.2 | 414.0 | 446.0 | 88 | 100 | 430.1 | 407.0 | 442.0 | 80 | 431.2 | 409.0 | 445.0 | 87 | 429.3 | 411.0 | 444 | 425.3 | 408.0 | 443.0 | 91 | 89 | 430.3 | 407.0 | 446.0 | 88 | ||||
9th Grade Science | 431.4 | 410.0 | 446.0 | 88 | 100 | 430.3 | 406.0 | 446.0 | 80 | 431.8 | 400.0 | 449.0 | 87 | 429.5 | 407.0 | 446 | 424.9 | 404.0 | 444.0 | 91 | 89 | 430.7 | 400.0 | 449.0 | 88 | ||||
Passed High School Course (%) | |||||||||||||||||||||||||||||
Algebra 1 | 7.0 | 0.0 | 100.0 | 0.1 | 0 | 100.0 | 12.9 | 0.0 | 100.0 | 8.4 | 0.0 | 100.0 | 5.4 | 0.0 | 100 | 6.7 | 0.0 | 100.0 | 7.8 | 0.0 | 100.0 | ||||||||
Algebra 2 | 29.3 | 0.0 | 100.0 | 1.1 | 0 | 100.0 | 34.0 | 0.0 | 100.0 | 21.3 | 0.0 | 100.0 | 20.9 | 0.0 | 100 | 19.2 | 0.0 | 100.0 | 22.4 | 0.0 | 100.0 | ||||||||
Geometry | 18.5 | 0.0 | 100.0 | 0.2 | 0 | 100.0 | 28.2 | 0.0 | 100.0 | 15.7 | 0.0 | 100.0 | 14.6 | 0.0 | 100 | 14.3 | 0.0 | 100.0 | 16.2 | 0.0 | 100.0 |
bind_rows(desc_table(data2, vars2, 1, TRUE) %>%
select(name, group, contains("Mean")),
desc_table(data2, vars2miss,0, TRUE) %>%
select(name, group, contains("Mean"))) %>%
rename("Category" = name,
"Subgroup" = group) %>%
rename_with(~str_remove_all(.,"_Mean")) %>%
arrange(Subgroup) %>%
select(Subgroup,
Category, starts_with("ASU"),
starts_with("ASUMID"),
starts_with("CCCUA"),
starts_with("NWACC"),
starts_with("SAU"),
starts_with("SAUTECH"),
starts_with("All")) %>%
as_grouped_data(groups = "Subgroup") %>%
flextable()
Subgroup | Category | ASU | ASUMID | CCCUA | NWACC | SAU | SAUTECH | All |
Age | ||||||||
21-30 | 1.6 | 33.6 | 26.1 | 27.6 | 9.3 | 46.3 | 24.4 | |
31 or older | 0.2 | 30.4 | 12.2 | 12.3 | 3.4 | 26.4 | 11.8 | |
20 or younger | 98.3 | 35.9 | 37.5 | 35.6 | 87.2 | 10.3 | 46.7 | |
Age missing | 0.0 | 0.0 | 24.0 | 24.0 | 0.0 | 17.0 | 17.0 | |
ESL Status | ||||||||
No | 99.4 | 0.0 | 63.9 | 61.6 | 100.0 | 65.1 | 70.1 | |
Yes | 0.6 | 0.0 | 12.3 | 14.0 | 0.0 | 18.0 | 10.7 | |
ESL status missing | 0.0 | 100.0 | 24.0 | 24.0 | 0.0 | 17.0 | 19.0 | |
Gender | ||||||||
Men | 41.8 | 41.2 | 24.0 | 31.8 | 45.9 | 51.5 | 36.8 | |
Women | 58.2 | 58.8 | 52.2 | 43.5 | 54.1 | 31.5 | 46.0 | |
Gender missing | 0.0 | 0.0 | 24.0 | 25.0 | 0.0 | 17.0 | 17.0 | |
High School Experience | ||||||||
Ever Economically Disadvantaged | 0.2 | 0.0 | 0.4 | 0.1 | 0.2 | 0.2 | 0.2 | |
Ever Section 504 Plan | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Homeless | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Limited English Proficiency | 0.0 | 0.0 | 0.1 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Special Education | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
Ever Gifted/Talented | 0.0 | 0.0 | 0.1 | 0.1 | 0.0 | 0.1 | 0.1 | |
Pell Eligible | ||||||||
Pell Ineligible | 58.4 | 37.1 | 44.6 | 57.1 | 43.3 | 0.0 | 47.7 | |
Pell Eligible | 41.6 | 62.9 | 31.6 | 18.4 | 56.7 | 0.0 | 25.2 | |
Pell status missing | 0.0 | 0.0 | 24.0 | 24.0 | 0.0 | 100.0 | 27.0 | |
Race/Ethnicity | ||||||||
Asian | 0.8 | 1.6 | 0.2 | 2.3 | 0.8 | 0.6 | 1.6 | |
Black | 11.0 | 58.9 | 11.1 | 2.2 | 26.4 | 24.5 | 10.4 | |
Hispanic | 4.2 | 3.1 | 12.9 | 13.5 | 4.5 | 3.0 | 9.7 | |
Other | 3.1 | 1.2 | 8.4 | 4.6 | 1.6 | 0.7 | 3.8 | |
White | 76.5 | 32.0 | 42.4 | 49.7 | 64.6 | 53.8 | 54.6 | |
Race/ethnicity missing | 4.0 | 3.0 | 25.0 | 28.0 | 2.0 | 17.0 | 20.0 | |
Received GED | ||||||||
Did not receive | 0.0 | 90.4 | 73.1 | 71.8 | 91.7 | 77.6 | 65.3 | |
Received | 0.0 | 9.6 | 3.1 | 3.6 | 8.2 | 5.5 | 3.9 | |
GED status missing | 100.0 | 0.0 | 24.0 | 25.0 | 0.0 | 17.0 | 31.0 |
The graph below summarizes enrollment and completion for college-level math and English courses. Please note, the percentages were calculated using the full sample of students across the entire time period. The enrollment and completion rates for each year would be higher than they appear in the graph.
%>%
data2 # select variables of interest
select(site, sample_id,
contains(c("tvcrat","tvcret")) & ends_with("c") & !contains("oth")) %>%
select(-starts_with("c")) %>%
# reshape the data from wide to long
pivot_longer(cols = c(-site, -sample_id)) %>%
# clean the data for graphing
mutate(Mon = case_when(str_sub(name,1,1) == "w" ~ 1,
str_sub(name,1,1) == "s" ~ 2,
str_sub(name,1,1) == "u" ~ 6,
str_sub(name,1,1) == "f" ~ 8),
Year = as.numeric(str_sub(name,2,3)) + 2000,
Term = as.Date(paste0(Year,"-",Mon,"-1"),"%Y-%m-%d"),
Type = if_else(str_extract(name,"cr[ae]t" ) == "crat","Enrolled","Completed"),
Subj = if_else(str_extract(name,"math|eng") == "math","Math" ,"English" ),
Type_fct = factor(Type, levels = c("Enrolled","Completed")),
Subj_fct = factor(Subj, levels = c("English" , "Math"))) %>%
# keeping only fall semester data
filter(str_sub(name,1,1) %in% c("f")) %>%
# calculating % of students for each group
group_by(site, Term, Type, Subj) %>%
mutate(Percent = sum(value)/n()) %>%
data_graph() + ylim(c(0,50))
Correlation is a statistical measure that captures the extent to which two variables are linearly related. It is useful for describing simple relationships among our data, but does not tell us anything about how well these variables may predict success in college-level courses.
The correlation coefficient (which is reported in the figures below) quantifies the strength of the relationship between any two variables. The correlation coefficient ranges from -1 to 1, and the closer it is to zero, the weaker the relationship. A positive coefficient indicates a positive correlation, meaning both variables tend to increase together. Conversely, a negative coefficient indicates a negative correlation, where one variable tends to increase when the other variable decreases.
<- data2 %>%
act select(starts_with("act") & !ends_with(c("m0","miss"))) %>%
rename_all(~toupper(str_replace_all(.,"act_","")))
<- as.matrix(act)
act_matrix
<- cbind(act_matrix, GPA = as_vector(data2$gpa_final))
act_gpa_matrix <- Hmisc::rcorr(x = act_gpa_matrix, type = "pearson")
act_gpa_corr $r[act_gpa_corr$n < 10] <- NA
act_gpa_corr
corrplot(act_gpa_corr$r,
method = "circle",
type = "upper",
diag = F,
addCoef.col = "black",
cl.pos = 'n',
tl.col = "black",
tl.srt = 45,
mar = c(0,0,5,0),
title = "Pearson Correlation for ACT tests and High School GPA")
<- data2 %>%
tests select(starts_with("grade") & !ends_with(c("m0","miss"))) %>%
rename_all(~toupper(str_replace_all(.,"grade|_score",""))) %>%
rename_all(~str_replace_all(.,"_M"," Math")) %>%
rename_all(~str_replace_all(.,"_S"," Science")) %>%
rename_all(~str_replace_all(.,"_RLA"," Reading")) %>%
rename_all(~str_replace_all(.,"9","9th Grade")) %>%
rename_all(~str_replace_all(.,"10","10th Grade"))
<- as.matrix(tests)
tests_matrix
<- cbind(tests_matrix, GPA = as_vector(data2$gpa_final))
tests_gpa_matrix <- Hmisc::rcorr(x = tests_gpa_matrix, type = "pearson")
tests_gpa_corr $r[tests_gpa_corr$n < 10] <- NA
tests_gpa_corr
corrplot(tests_gpa_corr$r,
method = "circle",
type = "upper",
diag = F,
addCoef.col = "black",
cl.pos = 'n',
tl.col = "black",
tl.srt = 45,
mar = c(0,0,5,0),
title = "Pearson Correlation for ADE tests and High School GPA")
<- data2 %>%
other select(class_rank, avg_days_present, avg_days_absent) %>%
rename_all(~toupper(str_replace_all(.,"_"," ")))
<- as.matrix(other)
other_matrix
<- cbind(other_matrix, GPA = as_vector(data2$gpa_final))
other_gpa_matrix <- Hmisc::rcorr(x = other_gpa_matrix, type = "pearson")
other_gpa_corr $r[other_gpa_corr$n < 10] <- NA
other_gpa_corr
corrplot(other_gpa_corr$r,
method = "circle",
type = "upper",
diag = F,
addCoef.col = "black",
cl.pos = 'n',
tl.col = "black",
tl.srt = 45,
mar = c(0,0,5,0),
title = "Pearson Correlation for Various High School Metrics and GPA")