![SSCC - Social Science Computing Cooperative SSCC - Social Science Computing Cooperative](sscc_logoBW.png)
5.4 Factors and Indicators
These exercises use the mtcars.csv
data set.
Import the
mtcars.csv
data set.mtcars_path <- file.path("..", "datasets", "mtcars.csv") mtcars_in <- read_csv(mtcars_path, col_types = cols())
Warning: Missing column names filled in: 'X1' [1]
mtcars_in <- rename(mtcars_in, make_model = X1) mtcars <- mtcars_in glimpse(mtcars)
Observations: 32 Variables: 12 $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet... $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.... $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ... $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14... $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,... $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9... $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.... $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20... $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ... $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ... $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ...
Factor the
cyl
,gear
andcarb
variables.mtcars <- mtcars %>% mutate_at(vars(cyl, gear, carb), ~ factor(.)) glimpse(mtcars)
Observations: 32 Variables: 12 $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet... $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.... $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ... $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14... $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,... $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9... $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.... $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20... $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ... $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... $ gear <fct> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ... $ carb <fct> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ...
Create a variable that identifies the observations that are in the top 25 percent of miles per gallon. Display a few of these vehicles.
Hint, you will need to find a function to identify the percentage points of a variable.
mtcars <- mtcars %>% mutate( efficient = mpg >= quantile(mpg, probs=0.75) ) efficient_cars <- mtcars %>% filter(efficient) glimpse(efficient_cars)
Observations: 9 Variables: 13 $ make_model <chr> "Datsun 710", "Merc 240D", "Merc 230", "Fiat 128", ... $ mpg <dbl> 22.8, 24.4, 22.8, 32.4, 30.4, 33.9, 27.3, 26.0, 30.4 $ cyl <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4 $ disp <dbl> 108.0, 146.7, 140.8, 78.7, 75.7, 71.1, 79.0, 120.3,... $ hp <dbl> 93, 62, 95, 66, 52, 65, 66, 91, 113 $ drat <dbl> 3.85, 3.69, 3.92, 4.08, 4.93, 4.22, 4.08, 4.43, 3.77 $ wt <dbl> 2.320, 3.190, 3.150, 2.200, 1.615, 1.835, 1.935, 2.... $ qsec <dbl> 18.61, 20.00, 22.90, 19.47, 18.52, 19.90, 18.90, 16... $ vs <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1 $ am <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1 $ gear <fct> 4, 4, 4, 4, 4, 4, 4, 5, 5 $ carb <fct> 1, 2, 2, 1, 2, 1, 1, 2, 2 $ efficient <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
Create a variables that bins the values of
hp
using the following amounts of hp: 100, 170, 240, and 300.mtcars <- mtcars %>% mutate( power = cut(hp, breaks = c(-Inf, 100, 170, 240, 300, Inf), labels = c("gocart", "slow", "typical", "fast", "beast") ) ) glimpse(mtcars)
Observations: 32 Variables: 14 $ make_model <chr> "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet... $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.... $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, ... $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 14... $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,... $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.9... $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.... $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20... $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ... $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... $ gear <fct> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, ... $ carb <fct> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, ... $ efficient <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRU... $ power <fct> slow, slow, gocart, slow, typical, slow, fast, goca...