## Clear all data
rm(list=ls())

## DECLARATIONS AND FUNCTIONS
  # Collated data file to open
rFile <- [WHAT IS THE NAME OF THE CSV FILE THAT CONTAINS YOUR SURVEY DATA, INCLUDING GPAs]

## READ IN THE DATA FILES
  # Roster file
d.temp <- read.csv(rFile, header = TRUE, stringsAsFactors = FALSE)
  # Which column contains the GPAs?
gpa.col <- [ ]
  # Rename the GPA column
names(d.temp)[gpa.col] <- "curr.gpa"
  # Only keep data for students who answered yes to all questions.
  # There were [??] questions
  # So, if they answered "yes" to everything, they will have a score of [??]
  # MAKE SURE TO MODIFY THIS SCORE ACCORDING TO THE SURVEY THAT YOU SENT OUT!!!
d.temp.keep <- subset(d.temp, d.temp$score == [Score that means student said yes to all questions], select = c(1:6, gpa.col, 39))
  # Keep only the unique values
d.temp.final <- subset(d.temp.keep, !duplicated(d.temp.keep$id))


    # Initialize the number of members in each group
  noMembers <- 6
    # Create a dataframe to work on
  d <- d.temp.final
  
    # Find the average class GPA
    # Use this to replace any NAs
    # This is not a perfect solution, but works reasonably well, as long as you don't have too many NAs!
  avg.ClassGPA <- mean(d$curr.gpa, na.rm = TRUE)
    # Replace NAs with average GPA
  d[is.na(d)] <- avg.ClassGPA
  
    # Figure out the number of groups to form, each of "n" members
    # For this, first see if the number of groups works out perfectly
  nGps <- as.integer(nrow(d) / noMembers)
    # If the number of groups isn't perfect, then we need to have one more group, and fewer members (than the max) in some groups
  if((nGps * noMembers) < nrow(d)) { 
    # Increase the number of groups by 1 to have MORE groups than necessary
    nGps <- nGps + 1
  }
  
    # Calculate the number of groups with a different number of members than n
    # First, if we would end up with more students than we have, reduce the group size, and the number of groups
  if((nGps * noMembers) > nrow(d)) {
    noMembers = noMembers - 1
  }
    # Now, we can calcuate the extras
  dif <- nrow(d) - (nGps * noMembers)
  
    ## FORM THE GROUPS. The logic is to sort the data by GPA, and then put the first "X" (where X is no. of groups)
    ## students as the first member of their groups. Then, reverse the order to put the next "X" students as the second member
    ## of their groups. And then add a 3rd and 4th member. If needed, add extra members to some groups
    # First, sort the dataframe by GPA
  rs <- d[with(d, order(curr.gpa, section)), ]
  
    # Now, create the list to contain the groups, based on the logic described above
    # First, initialize the groups
  lst <- rep(1:nGps)
    # We will then need to run the loop to at least (n-1) times since every group will have at least (n-1) members (where n is the group size that we want)
  for(i in 2:(noMembers)) {
    if(i%%2 == 0) {
      lst <- c(lst, rep(nGps:1))
      ctr = 1
    } else {
      lst <- c(lst, rep(1:nGps))
      ctr = 2}
  }
    # Now, fill up the groups that will have the maximum number of students in each group (i.e., n students in each group)
  if(ctr%%2 == 0) {
    lst <- c(lst, rep(dif:1))
  } else {
    if(dif > 0) {lst <- c(lst, rep(1:dif))}
  }
  
    # Add the groups column to the dataframe that was sorted by GPA, based on logic above
  rs$GroupTemp <- lst
    # Just create a pretty column to hold the group number
  rs$Group <- paste("Group #", as.character(rs$GroupTemp), sep = "")
    # Convert the groups into factors
  rs$Group <- as.factor(rs$Group)
  
    # Calculate the mean GPA for each group
  meanGPAs <- aggregate(rs$curr.gpa, list(Group=rs$Group), FUN=mean)
    # Put this information into the data frame
  rs$Group.GPA <- with(meanGPAs, x[match(rs$Group, Group)])
  
    # Create a filename for output
  gpGPAfile <- "GROUPS AND GPAs.csv"
    # Write out the file with the relevant columns for the end-of-class calculations
  write.csv(rs, gpGPAfile, row.names = FALSE)
  
    # Test that the GPAs are all approximately the same
  fit <- aov(curr.gpa~Group, data=rs)
    # Write out the stats
  write.csv(summary(fit)[[1]], paste("Stats for section ", as.character(itr), ".csv", sep = ""), row.names = FALSE)
}

  ## Clear all data
rm(list=ls())