SSCC - Social Science Computing Cooperative Supporting Statistical Analysis for Research

5.7 Relationships between columns

These exercises use the Chile.csv data set.

  1. Import the Chile.csv file.

    library(tidyverse)
    chile_path <- file.path("..", "datasets", "Chile.csv")
    chile_in <- read_csv(chile_path, col_types = cols())
    Warning: Missing column names filled in: 'X1' [1]
    chile_in <- 
      chile_in %>%
      rename(
        status_quo = statusquo
      )
    
    chile <- 
      chile_in %>%
      select(-X1)
    
    glimpse(chile)
    Observations: 2,700
    Variables: 8
    $ region     <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "...
    $ population <dbl> 175000, 175000, 175000, 175000, 175000, 175000, 175...
    $ sex        <chr> "M", "M", "F", "F", "F", "F", "M", "F", "F", "M", "...
    $ age        <dbl> 65, 29, 38, 49, 23, 28, 26, 24, 41, 41, 64, 19, 27,...
    $ education  <chr> "P", "PS", "P", "P", "S", "P", "PS", "S", "P", "P",...
    $ income     <dbl> 35000, 7500, 15000, 35000, 35000, 7500, 35000, 1500...
    $ status_quo <dbl> 1.00820, -1.29617, 1.23072, -1.03163, -1.10496, -1....
    $ vote       <chr> "Y", "N", "Y", "N", "N", "N", "N", "N", "U", "N", "...
  2. Find all rows with a missing value in any column using a related columns method.

    chile_na_rows <- 
      chile %>%
      mutate(
        missing = 
          pmap_dbl(select(., region:vote),
                   ~ sum(is.na(c(...)))
                   ) >=
          1
        ) %>%
      filter(missing) %>%
      select(-missing)
    
    head(chile_na_rows)
    # A tibble: 6 x 8
      region population sex     age education income status_quo vote 
      <chr>       <dbl> <chr> <dbl> <chr>      <dbl>      <dbl> <chr>
    1 N          175000 F        27 PS            NA      1.43  Y    
    2 N          175000 M        36 PS         35000      1.49  <NA> 
    3 N          175000 F        43 P             NA      0.155 A    
    4 N          125000 F        32 S             NA     -0.850 N    
    5 N          125000 F        34 P           2500      0.108 <NA> 
    6 N          250000 F        46 S             NA      0.155 <NA>