Add labels to results table

This is an example how to add labels to the results table.

library(presentresults)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
# load dataset and kobo
results_table <- presentresults_MSNA2024_results_table

kobo_survey <- presentresults_MSNA2024_kobotool_template$kobo_survey
kobo_choices <- presentresults_MSNA2024_kobotool_template$kobo_choices

Note that the kobotools have additions, new indicators were added to the tools

kobo_survey |>
  tail() |>
  select(type, name, `label::english`)
#> # A tibble: 6 × 3
#>   type                name         `label::english`                  
#>   <chr>               <chr>        <chr>                             
#> 1 select_one l_1_to_5 comp_prot    Sectoral composite - Protection   
#> 2 select_one l_1_to_5 comp_edu     Sectoral composite - Education    
#> 3 select_one l_1_to_5 comp_foodsec Sectoral composite - Food security
#> 4 select_one l_1_to_5 comp_wash    Sectoral composite - WASH         
#> 5 select_one l_1_to_5 comp_snfi    Sectoral composite - Shelter      
#> 6 select_one l_1_to_5 msni         Multi-Sectoral Needs Index (MSNI)
kobo_choices |>
  tail() |>
  select(list_name, name, `label::english`)
#> # A tibble: 6 × 3
#>   list_name name  `label::english`
#>   <chr>     <chr> <chr>           
#> 1 <NA>      <NA>  <NA>            
#> 2 l_1_to_5  1     1               
#> 3 l_1_to_5  2     2               
#> 4 l_1_to_5  3     3               
#> 5 l_1_to_5  4     4               
#> 6 l_1_to_5  5     4+

First you should review the kobotool to see if there is any duplicated label, names, etc. This will cause issues later one. I am passing the results table so I only look at the variables presents in the results, not everything.

review_kobo_labels_results <- review_kobo_labels(kobo_survey,
  kobo_choices,
  results_table = results_table
)
review_kobo_labels_results
#> # A tibble: 9 × 5
#>   comments                                name  list_name `label::english`     n
#>   <chr>                                   <chr> <chr>     <chr>            <int>
#> 1 Kobo survey sheet has duplicated label… <NA>  <NA>      How often did t…     3
#> 2 Kobo choices sheet has duplicated name… none  l_snfi_s… <NA>                 2
#> 3 Kobo choices sheet has duplicated name… surf… l_wash_d… <NA>                 2
#> 4 Kobo choices sheet has duplicated labe… <NA>  l_admin1  To be updated b…     2
#> 5 Kobo choices sheet has duplicated labe… <NA>  l_admin2  To be updated b…     3
#> 6 Kobo choices sheet has duplicated labe… <NA>  l_admin3  To be updated b…     4
#> 7 Kobo choices sheet has duplicated labe… <NA>  l_admin4  To be updated b…     4
#> 8 Kobo choices sheet has duplicated labe… <NA>  l_cluste… To be updated b…     4
#> 9 Kobo choices sheet has duplicated labe… <NA>  l_edu_le… To be updated b…     5

In this case we have the HHS frequency question repeated, I will add which one they are referring to.

kobo_survey_fixed <- kobo_survey
kobo_survey_fixed[
  which(kobo_survey_fixed[["label::english"]] == "How often did this happen in the past [4 weeks/30 days]?"),
  "label::english"
] <- paste(
  "How often did this happen in the past [4 weeks/30 days]? ---",
  c(
    "In the past 4 weeks (30 days), was there ever no food to eat of any kind in your house because of lack of resources to get food?",
    "In the past 4 weeks (30 days), did you or any household member go to sleep at night hungry because there was not enough food?",
    "In the past 4 weeks (30 days), did you or any household member go a whole day or night without eating anything at all because there was not enough food?"
  )
)

Then I will deal in the choices sheet. There are 2 flags: - Kobo choices sheet has duplicated names in the same list_name.
- Kobo choices sheet has duplicated labels in the same list_name.

For the duplicated names in the same list name, these were added with the composite indicators. I will just keep one.

kobo_choices_fixed <- kobo_choices |>
  filter(!`label::english` %in% c(
    "No shelter (sleeping in the open)",
    "Surface water (river, dam, lake, pond, stream, canal, irrigation channel)"
  ))

duplicated_listname_label <- review_kobo_labels_results |> filter(comments == "Kobo choices sheet has duplicated labels in the same list_name.")

For the duplicated labels, these are because of the template. I will just add a number based on their order.

kobo_choices_fixed <- kobo_choices_fixed |>
  group_by(list_name)  |> 
  mutate(`label::english` = case_when(
    list_name %in% duplicated_listname_label$list_name ~ paste(`label::english`, row_number()),
    TRUE ~ `label::english`
  ))  |> 
  ungroup()

I can review again.

review_kobo_labels(kobo_survey_fixed, kobo_choices_fixed, results_table = results_table)
#> # A tibble: 0 × 5
#> # ℹ 5 variables: comments <chr>, name <chr>, list_name <chr>,
#> #   label::english <chr>, n <int>

I can now create a dictionary that will be used to create labels.

label_dictionary <- create_label_dictionary(kobo_survey_fixed, kobo_choices_fixed, results_table = results_table)

I can then add the labels to the results table.

results_table_labeled <- add_label_columns_to_results_table(
  results_table,
  label_dictionary
)
#> Joining with `by = join_by(analysis_type)`
#> Joining with `by = join_by(analysis_key)`

results_table_labeled |> 
  head()
#> # A tibble: 6 × 22
#>   analysis_type  analysis_var analysis_var_value group_var group_var_value  stat
#>   <chr>          <chr>        <chr>              <chr>     <chr>           <dbl>
#> 1 prop_select_o… snfi_fds_ca… 1                  admin1    PCODE1          0.286
#> 2 prop_select_o… snfi_fds_ca… 2                  admin1    PCODE1          0.143
#> 3 prop_select_o… snfi_fds_ca… 3                  admin1    PCODE1          0.429
#> 4 prop_select_o… snfi_fds_ca… 4                  admin1    PCODE1          0.143
#> 5 prop_select_o… snfi_fds_ca… 1                  admin1    PCODE2          0.267
#> 6 prop_select_o… snfi_fds_ca… 2                  admin1    PCODE2          0.267
#> # ℹ 16 more variables: stat_low <dbl>, stat_upp <dbl>, n <dbl>, n_total <dbl>,
#> #   n_w <dbl>, n_w_total <dbl>, analysis_key <chr>, theme <chr>, module <chr>,
#> #   indicator <chr>, label_analysis_var <chr>, label_analysis_var_value <chr>,
#> #   label_group_var <chr>, label_group_var_value <chr>,
#> #   label_analysis_type <chr>, label_analysis_key <chr>