A podcast about academic sociology for academic sociologists
A podcast about academic sociology for academic sociologists

Data on America’s Best-Enrolled Sociology Departments

The most recent episode of The Annex featured a discussion of enrollments in sociology departments. You can see the segment here:

YouTube player

The segment discussed these (very poorly formatted) tables:

America’s Largest Sociology Departments by Graduating Class Size, 2020, IPEDS, 1 of 2
America’s Largest Sociology Departments by Graduating Class Size, 2020, IPEDS, 2 of 2
HIghest Ratio of Majors Graduates in Sociology to Total Graduates, 2020, IPEDS data

This analysis is admittedly quick-and-dirty. Here’s the data: Set One Set Two

Review my code!

knitr::opts_chunk$set(echo = FALSE)
rm(list=ls())
gc()
directory <- "D:/Dropbox/Annex/Banter/Assessing Departmental Performance"
setwd(directory)

library(data.table)
library(sjPlot)
library(sjmisc)
library(sjlabelled)

tab.1.o <- read.csv("c2020_a.csv")
tab.1.ot <- subset(tab.1.o, AWLEVEL %in% c(5,7,17,18,19))
tab.1 <- tab.1.ot[c(1,2,6,8,10,18,20,22,24,26,28,30,32,34,42,44,46,60,62,64)]

names(tab.1) <- paste(c("school", "CIPCODE", "total.all", "men.all", "women.all", "total.asian", "men.asian", "women.asian", "total.black", "men.black", "women.black", "total.hisp", "men.hisp", "women.hisp", "total.white", "men.white", "women.white", "total.nralien", "men.nralien", "women.nralien"))

tab.A1 <- aggregate(tab.1$total.all, by=list(tab.1$school), sum)
names(tab.A1) <- c("school", "total.all")
tab.A2 <- aggregate(tab.1$men.all, by=list(tab.1$school), sum)
names(tab.A2) <- c("school", "total.men")
tab.A3 <- aggregate(tab.1$total.white, by=list(tab.1$school), sum)
names(tab.A3) <- c("school", "total.white")
tab.A4 <- aggregate(tab.1$total.nralien, by=list(tab.1$school), sum)
names(tab.A4) <- c("school", "total.nralien")
tab.AA <- merge(tab.A1, tab.A2, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A3, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A4, by = "school", all = T)
tab.all <- tab.AA
rm(tab.A1, tab.A2, tab.A3, tab.A4, tab.AA, tab.1.ot)


tab.soc <- subset(tab.1, CIPCODE == 45.1101 | CIPCODE == 45.1401)
tab.A1 <- aggregate(tab.soc$total.all, by=list(tab.soc$school), sum)
names(tab.A1) <- c("school", "soc.all")
tab.A2 <- aggregate(tab.soc$men.all, by=list(tab.soc$school), sum)
names(tab.A2) <- c("school", "soc.men")
tab.A3 <- aggregate(tab.soc$total.white, by=list(tab.soc$school), sum)
names(tab.A3) <- c("school", "soc.white")
tab.A4 <- aggregate(tab.soc$total.nralien, by=list(tab.soc$school), sum)
names(tab.A4) <- c("school", "soc.nralien")
tab.AA <- merge(tab.A1, tab.A2, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A3, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A4, by = "school", all = T)
tab.soc <- tab.AA
rm(tab.A1, tab.A2, tab.A3, tab.A4, tab.AA)

tab.socanthro <- subset(tab.1, CIPCODE == 45.1101 | CIPCODE == 45.1401 | CIPCODE == 45.1301)
tab.A1 <- aggregate(tab.socanthro$total.all, by=list(tab.socanthro$school), sum)
names(tab.A1) <- c("school", "socanthro.all")
tab.A2 <- aggregate(tab.socanthro$men.all, by=list(tab.socanthro$school), sum)
names(tab.A2) <- c("school", "socanthro.men")
tab.A3 <- aggregate(tab.socanthro$total.white, by=list(tab.socanthro$school), sum)
names(tab.A3) <- c("school", "socanthro.white")
tab.A4 <- aggregate(tab.socanthro$total.nralien, by=list(tab.socanthro$school), sum)
names(tab.A4) <- c("school", "socanthro.nralien")
tab.AA <- merge(tab.A1, tab.A2, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A3, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A4, by = "school", all = T)
tab.socanthro <- tab.AA
rm(tab.A1, tab.A2, tab.A3, tab.A4, tab.AA)


tab.econ <- subset(tab.1, CIPCODE %in% c(45.0601, 45.0602, 45.0603, 45.0604, 45.0605, 45.0699))
tab.A1 <- aggregate(tab.econ$total.all, by=list(tab.econ$school), sum)
names(tab.A1) <- c("school", "econ.all")
tab.A2 <- aggregate(tab.econ$men.all, by=list(tab.econ$school), sum)
names(tab.A2) <- c("school", "econ.men")
tab.A3 <- aggregate(tab.econ$total.white, by=list(tab.econ$school), sum)
names(tab.A3) <- c("school", "econ.white")
tab.A4 <- aggregate(tab.econ$total.nralien, by=list(tab.econ$school), sum)
names(tab.A4) <- c("school", "econ.nralien")
tab.AA <- merge(tab.A1, tab.A2, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A3, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A4, by = "school", all = T)
tab.econ <- tab.AA
rm(tab.A1, tab.A2, tab.A3, tab.A4, tab.AA)


tab.socsci <- subset(tab.1, CIPCODE>45 & CIPCODE<46)
tab.A1 <- aggregate(tab.socsci$total.all, by=list(tab.socsci$school), sum)
names(tab.A1) <- c("school", "socsci.all")
tab.A2 <- aggregate(tab.socsci$men.all, by=list(tab.socsci$school), sum)
names(tab.A2) <- c("school", "socsci.men")
tab.A3 <- aggregate(tab.socsci$total.white, by=list(tab.socsci$school), sum)
names(tab.A3) <- c("school", "socsci.white")
tab.A4 <- aggregate(tab.socsci$total.nralien, by=list(tab.socsci$school), sum)
names(tab.A4) <- c("school", "socsci.nralien")
tab.AA <- merge(tab.A1, tab.A2, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A3, by = "school", all = T)
tab.AA <- merge(tab.AA, tab.A4, by = "school", all = T)
tab.socsci <- tab.AA
rm(tab.A1, tab.A2, tab.A3, tab.A4, tab.AA)


data <- merge(tab.all, tab.soc, by = "school", all = T)
data <- merge(data, tab.socanthro, by = "school", all = T)
data <- merge(data, tab.econ, by = "school", all = T)
data <- merge(data, tab.socsci, by = "school", all = T)

rm(tab.1, tab.all, tab.econ, tab.soc, tab.socanthro, tab.socsci)

identifiers <- read.csv("hd2020.csv")[c(1:9, 49:54,59,61,65)]
names(identifiers) <- paste(c("school", "institution", "inst.alias", "address", "city", "state", "zip", "FIPS", "BEA.region",
                              "C18.basic", "C18.ugrad", "C18.ugprofile", "c18.enroll", "C18.sizeset", "size.cat", "system.name",
                              "CSA.area"))
data <- merge(data, identifiers, by = "school", all = F)

data.all <- data
data <- subset(data, socanthro.all > 0)

data$r.soc.all <- (data$socanthro.all / data$total.all)
data$r.soc.socsci <- (data$socanthro.all / data$socsci.all) 
data$r.soc.econ <- (data$socanthro.all / data$econ.all) 

data$R1 <- data$C18.basic == 15
data$R12 <- data$C18.basic %in% c(15, 16)



temp <- data[c("institution", "socanthro.all")]
setorder(temp, -socanthro.all)
write.csv(temp[1:100,], file = "Table 1.csv", row.names = F)


temp <- data[c("institution", "socanthro.all",  "r.soc.all", "r.soc.socsci", "r.soc.econ", "C18.basic")]

temp <- subset(temp, temp$socanthro.all >= 30)
setorder(temp, -r.soc.all)
write.csv(temp[1:100,], file = "Table 2.csv", row.names = F)


temp <- data
temp$white.grads <- (temp$total.white / temp$total.all) * 100
temp$male.grads <- (temp$total.men / temp$total.all) * 100

m1 <- summary(lm(soc.all ~ total.all, temp))
m2 <- summary(lm(soc.all ~ total.all + total.men + total.white, temp))

Leave a comment

Your email address will not be published. Required fields are marked *