## Clear all data rm(list=ls()) ## DECLARATIONS AND FUNCTIONS # Collated data file to open rFile <- [WHAT IS THE NAME OF THE CSV FILE THAT CONTAINS YOUR SURVEY DATA, INCLUDING GPAs] ## READ IN THE DATA FILES # Roster file d.temp <- read.csv(rFile, header = TRUE, stringsAsFactors = FALSE) # Which column contains the GPAs? gpa.col <- [ ] # Rename the GPA column names(d.temp)[gpa.col] <- "curr.gpa" # Only keep data for students who answered yes to all questions. # There were [??] questions # So, if they answered "yes" to everything, they will have a score of [??] # MAKE SURE TO MODIFY THIS SCORE ACCORDING TO THE SURVEY THAT YOU SENT OUT!!! d.temp.keep <- subset(d.temp, d.temp$score == [Score that means student said yes to all questions], select = c(1:6, gpa.col, 39)) # Keep only the unique values d.temp.final <- subset(d.temp.keep, !duplicated(d.temp.keep$id)) # Initialize the number of members in each group noMembers <- 6 # Create a dataframe to work on d <- d.temp.final # Find the average class GPA # Use this to replace any NAs # This is not a perfect solution, but works reasonably well, as long as you don't have too many NAs! avg.ClassGPA <- mean(d$curr.gpa, na.rm = TRUE) # Replace NAs with average GPA d[is.na(d)] <- avg.ClassGPA # Figure out the number of groups to form, each of "n" members # For this, first see if the number of groups works out perfectly nGps <- as.integer(nrow(d) / noMembers) # If the number of groups isn't perfect, then we need to have one more group, and fewer members (than the max) in some groups if((nGps * noMembers) < nrow(d)) { # Increase the number of groups by 1 to have MORE groups than necessary nGps <- nGps + 1 } # Calculate the number of groups with a different number of members than n # First, if we would end up with more students than we have, reduce the group size, and the number of groups if((nGps * noMembers) > nrow(d)) { noMembers = noMembers - 1 } # Now, we can calcuate the extras dif <- nrow(d) - (nGps * noMembers) ## FORM THE GROUPS. The logic is to sort the data by GPA, and then put the first "X" (where X is no. of groups) ## students as the first member of their groups. Then, reverse the order to put the next "X" students as the second member ## of their groups. And then add a 3rd and 4th member. If needed, add extra members to some groups # First, sort the dataframe by GPA rs <- d[with(d, order(curr.gpa, section)), ] # Now, create the list to contain the groups, based on the logic described above # First, initialize the groups lst <- rep(1:nGps) # We will then need to run the loop to at least (n-1) times since every group will have at least (n-1) members (where n is the group size that we want) for(i in 2:(noMembers)) { if(i%%2 == 0) { lst <- c(lst, rep(nGps:1)) ctr = 1 } else { lst <- c(lst, rep(1:nGps)) ctr = 2} } # Now, fill up the groups that will have the maximum number of students in each group (i.e., n students in each group) if(ctr%%2 == 0) { lst <- c(lst, rep(dif:1)) } else { if(dif > 0) {lst <- c(lst, rep(1:dif))} } # Add the groups column to the dataframe that was sorted by GPA, based on logic above rs$GroupTemp <- lst # Just create a pretty column to hold the group number rs$Group <- paste("Group #", as.character(rs$GroupTemp), sep = "") # Convert the groups into factors rs$Group <- as.factor(rs$Group) # Calculate the mean GPA for each group meanGPAs <- aggregate(rs$curr.gpa, list(Group=rs$Group), FUN=mean) # Put this information into the data frame rs$Group.GPA <- with(meanGPAs, x[match(rs$Group, Group)]) # Create a filename for output gpGPAfile <- "GROUPS AND GPAs.csv" # Write out the file with the relevant columns for the end-of-class calculations write.csv(rs, gpGPAfile, row.names = FALSE) # Test that the GPAs are all approximately the same fit <- aov(curr.gpa~Group, data=rs) # Write out the stats write.csv(summary(fit)[[1]], paste("Stats for section ", as.character(itr), ".csv", sep = ""), row.names = FALSE) } ## Clear all data rm(list=ls())