SSCC - Social Science Computing Cooperative Supporting Statistical Analysis for Research

5.4 Factors and Indicators

These exercises use the mtcars.csv data set.

  1. Import the mtcars.csv data set.

    mtcars_path <- file.path("..", "datasets", "mtcars.csv")
    mtcars_in <- read_csv(mtcars_path, col_types = cols())
    Warning: Missing column names filled in: 'X1' [1]
    mtcars_in <- rename(mtcars_in, make_model = X1)
    
    mtcars <- mtcars_in
    
    glimpse(mtcars)
    Observations: 32
    Variables: 12
    $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet...
    $ mpg        <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22....
    $ cyl        <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ...
    $ disp       <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14...
    $ hp         <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,...
    $ drat       <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9...
    $ wt         <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3....
    $ qsec       <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20...
    $ vs         <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...
    $ am         <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
    $ gear       <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ...
    $ carb       <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ...
  2. Factor the cyl, gear and carb variables.

    mtcars <-
      mtcars %>%
      mutate_at(vars(cyl, gear, carb), ~ factor(.))
    
    glimpse(mtcars)
    Observations: 32
    Variables: 12
    $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet...
    $ mpg        <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22....
    $ cyl        <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ...
    $ disp       <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14...
    $ hp         <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,...
    $ drat       <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9...
    $ wt         <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3....
    $ qsec       <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20...
    $ vs         <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...
    $ am         <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
    $ gear       <fct> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ...
    $ carb       <fct> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ...
  3. Create a variable that identifies the observations that are in the top 25 percent of miles per gallon. Display a few of these vehicles.

    Hint, you will need to find a function to identify the percentage points of a variable.

    mtcars <-
      mtcars %>%
      mutate(
        efficient = mpg >= quantile(mpg, probs=0.75)
      )
    
    efficient_cars <-
      mtcars %>%
      filter(efficient)
    
    glimpse(efficient_cars)
    Observations: 9
    Variables: 13
    $ make_model <chr> "Datsun 710", "Merc 240D", "Merc 230", "Fiat 128", ...
    $ mpg        <dbl> 22.8, 24.4, 22.8, 32.4, 30.4, 33.9, 27.3, 26.0, 30.4
    $ cyl        <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4
    $ disp       <dbl> 108.0, 146.7, 140.8, 78.7, 75.7, 71.1, 79.0, 120.3,...
    $ hp         <dbl> 93, 62, 95, 66, 52, 65, 66, 91, 113
    $ drat       <dbl> 3.85, 3.69, 3.92, 4.08, 4.93, 4.22, 4.08, 4.43, 3.77
    $ wt         <dbl> 2.320, 3.190, 3.150, 2.200, 1.615, 1.835, 1.935, 2....
    $ qsec       <dbl> 18.61, 20.00, 22.90, 19.47, 18.52, 19.90, 18.90, 16...
    $ vs         <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1
    $ am         <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1
    $ gear       <fct> 4, 4, 4, 4, 4, 4, 4, 5, 5
    $ carb       <fct> 1, 2, 2, 1, 2, 1, 1, 2, 2
    $ efficient  <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
  4. Create a variables that bins the values of hp using the following amounts of hp: 100, 170, 240, and 300.

    mtcars <-
      mtcars %>%
      mutate(
        power = cut(hp, 
                    breaks = c(-Inf, 100, 170, 240, 300, Inf),
                    labels = c("gocart", "slow", "typical", "fast", "beast")
                    )
      )
    
    glimpse(mtcars)
    Observations: 32
    Variables: 14
    $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet...
    $ mpg        <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22....
    $ cyl        <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ...
    $ disp       <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14...
    $ hp         <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,...
    $ drat       <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9...
    $ wt         <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3....
    $ qsec       <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20...
    $ vs         <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...
    $ am         <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
    $ gear       <fct> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ...
    $ carb       <fct> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ...
    $ efficient  <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRU...
    $ power      <fct> slow, slow, gocart, slow, typical, slow, fast, goca...