rm(list=ls(all=t))

Setup filenames

filename <- "DFM_InDepth20152016_StudentsParents_NOPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("student_name",
              "name_pad",
              "num_telf",
              "future_parent",
              "school_parent",
              "education_parent",
              "pic_home",
              "consent_signed",
              "cto_padre_nom",
              "cto_padre_app1",
              "cto_padre_app2",
              "p27a1",
              "p27a2",
              "p27a3",
              "p27a4",
              "p27a5",
              "p27a6",
              "p27a7",
              "p27a8",
              "p27a9",
              "p27a10",
              "p27d1",
              "p27d2",
              "p27d3",
              "audio_video",
              "nompad",
              "app_pad",
              "nommad",
              "app_mad",
              "address",
              "dni",
              "NUMERO_DOCUMENTO",
              "guard_male_name",
              "conf_guard_male_name",
              "guard_male_surname",
              "conf_guard_male_surname",
              "guard_female_name",
              "conf_guard_female_name",
              "guard_female_surname",
              "conf_guard_female_surname",
              "nom_dist",
              "nombres",
              "fecha_nac_fixed",
              "audio1_student",
              "audio2_student",
              "audio3_student") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!!Replace vector in "variables" field below with relevant variable names

mydata <- mydata[!names(mydata) %in% "i5"]
mydata <- encode_direct_PII_team (variables="id_encuestador")
## [1] "Frequency table before encoding"
## id_encuestador. ID del encuestador
##                NONPII VERSION 
##             39           2709 
## [1] "Frequency table after encoding"
## id_encuestador. ID del encuestador
##    1    2 
##   39 2709

Small locations: Encode locations with pop <100,000 using random large numbers

#  !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("cod_mod_app",
             "Distrito",
             "Provincia",
             "prov",
             "dist",
             "cod_mod2",
             "COD_MOD",
             "cod_mod",
             "school_fixed_primary",
             "school_fixed_sec",
             "cole2016_admin",
             "cod_mod_2016",
             "cod_mod_2015",
             "p12",
             "codlocal",
             "s4p11b1_2015") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## cod_mod_app. cod_mod
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  207407  216341  220285  226704  232207  232223  232231  232249 
##       5       1       1       2       2       3       3       1       1       1       2       1       1       1       1       2       1       2       1       2       1 
##  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130  233296  233361  233676  233718  233734 
##       1       1       1       2       1       2       1       1       1       2       1       2       1       1       2       2       4       1       3       1       2 
##  233882  233890  233908  233916  233924  233932  233940  233957  233965  233973  233981  233999  234021  234062  234096  234104  234112  234120  234138  234153  234161 
##       1       1       4       3       2       2       3       1       2       2       2       2       2       2       2       3       1       2       3       3       2 
##  234187  234211  234229  234237  234351  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583  234674  234682  234781  234831  234856  236158 
##       2       1       2       1       1       2       1       3       2       3       3       2       1       2       2       3       3       2       3       2       1 
##  236349  236422  236448  236463  236471  236489  236653  236661  236927  287409  287425  287466  309286  309294  309377  309419  309682  310433  312090  312215  312306 
##       2       4       1       5       1       3       1       8       4       2       3       1       1       6       1       1       1       3       2       1       2 
##  312421  312744  312868  313080  313239  313395  313460  313890  313908  313965  313981  314070  314187  314211  314237  314245  314260  314278  314294  405258  405498 
##       1       1       2       1       2       2       1       3       2       2       3       2       3       2       1       2       4       3       2       3       3 
##  405704  405738  405746  405837  405852  405894  405902  405928  405936  406009  406066  406082  406116  406124  406140  406215  406223  406264  406413  406595  406629 
##       3       2       2       1       1       2       2       1       2       2       3       3       4       2       1       2       2       2       1       1       3 
##  406645  406975  406983  407007  407049  408245  408278  408286  408294  408328  408336  408393  408468  408476  408484  408492  408559  408567  408609  408666  408732 
##       1       2       3       1       2       2       2       3       2       1       1       3       2       3       1       1       1       3       2       3       1 
##  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193  409227  409235  409243  409284  409292  409300  409318  409326  409359  409441  409565 
##       1       2       1       2       3       2       2       3       3       2       3       1       2       2       3       3       2       1       2       2       2 
##  409896  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249  481283  486688  486928  489120  495069  495325  502922  504142  517581  517888 
##       1       1       1       2       1       1       2       5       2       2       1       3       3       1       2       5       6       2       1      12       2 
##  518084  518472  519496  519595  519678  525923  550392  551309  557587  579268  579276  579284  579292  579300  585885  587055  587147  587204  589804  591255  591602 
##      10       4       2       2       6       3       2       3       3       2       1       1       3       6       2       6       1       3       3       8       4 
##  592147  612051  612119  612291  612507  612689  612747  612770  612804  615013  616110  617787  617829  621391  623017  623041  637272  639542  639617  647388  647412 
##       3       2       2       1       3       1       1       1       2       2       3       6       4       5       2       1       5       1       1       2       3 
##  647446  647628  671628  672105  679829  680058  680082  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  776039  779041  783423  783621 
##       6       1       1       4       1       2       1       4       7       1       1       2       1       1       2       3       1       1       1       3       1 
##  783696  783704  783720  783787  783795  791319  791574  794438  796888  818674  844159  844183  891408  891812  895482  927871  930958  931055  931063  932236  932491 
##       3       1       3       2       1      10       1       1       1       2       2       2       5       1       1       3       1       7       6       3       2 
##  932608  932848  933226  933283  933291  933317  933531  933846  999999 1031574 1117704 1201649 1201870 1260942 1266428 1273655 1314376 1320647 1321322 1321330 1321355 
##       3       3       2       3       1       3       1       3      97       2       2       4       2       2       3       1       2       3       3       4       5 
## 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1377209 
##       4       1       1       1       1       3       4       2       1       2       1       1       2       4       2       7       1       3       1       3       1 
## 1377233 1377415 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390665 1390673 1392083 1392091 1392109 1392117 1392125 1392141 
##       1       3       6       1       2       1       1       3       4       1       1       2       4       5       4       1       4       2       1       5       5 
## 1392174 1392224 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 1408426 1412634 1412873 1415983 1418615 
##       5       2       2       3       4       2       8       6       1       2       1       1       2       2       5       3       2       3       2       3       1 
## 1423003 1442185 1452705 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 1666130    <NA> 
##       2       2       5       3       4       2       1       1       2       3       4       2       2       1       2       1       2       1    1732 
## [1] "Frequency table after encoding"
## cod_mod_app. cod_mod
##    636    637    638    639    640    641    642    643    644    645    646    647    648    649    650    651    652    653    654    655    656    657    658    659 
##      4      1      3      3      4      1     12      2      1      1      4      1      2      4      1      8      2      1      5      2      1      3      2      2 
##    660    661    662    663    664    665    666    667    668    669    670    671    672    673    674    675    676    677    678    679    680    681    682    683 
##      2      1      1      1      2      2      2      3      2      5      6      1      1      1      1      8      2      3      5      3      3      4      2      2 
##    684    685    686    687    688    689    690    691    692    693    694    695    696    697    698    699    700    701    702    703    704    705    706    707 
##      3      2      1      1      2      1      5      4      4      1      1      1      3      1      1      2      2      1      1      2      1      3      3      1 
##    708    709    710    711    712    713    714    715    716    717    718    719    720    721    722    723    724    725    726    727    728    729    730    731 
##      1      4      1      4      1      2      2      2      1      2      8      2      4      6      3      1      6      5      2      3      2      1      2      3 
##    732    733    734    735    736    737    738    739    740    741    742    743    744    745    746    747    748    749    750    751    752    753    754    755 
##      1      1      5      2      3      1      1      2      2      1      2      3      3      6      1      3      1      6      2      3      2      3      3      1 
##    756    757    758    759    760    761    762    763    764    765    766    767    768    769    770    771    772    773    774    775    777    778    779    780 
##      2      1      1      1      3      2      3      2      2      1      2      1      2      1      2      1      2      6      2      1      2      7      2      1 
##    781    782    783    784    785    786    787    788    789    790    791    792    793    794    795    796    797    798    799    800    801    802    803    804 
##      5      2      2      2      3      2      2      3      3      2      3      1      1      2      1      1      5      2      1      2      2      1      2      6 
##    805    806    807    808    809    810    811    812    813    814    815    816    817    818    819    820    821    822    823    824    825    826    827    828 
##      2      1      1      2      1      2      6      5      3      1      3      2      3      3      2      2      3      1      2      2      1      1      2      3 
##    829    830    831    832    833    834    835    836    837    838    839    840    841    842    843    844    845    846    847    848    849    850    851    852 
##      2      1      2      1      2      1      2      1      4      5      1      3      3      2      3      4      3      2      1      2      2      2      5      2 
##    853    854    855    856    857    858    859    860    861    862    863    864    865    866    867    868    869    870    871    872    873    874    875    876 
##      1      2      3      1      1      3      1      2      1      3      1      3      2      3      5      2      1      1      2      2      2      1     10      2 
##    877    878    879    880    881    882    883    884    885    886    887    888    889    890    891    892    893    894    895    896    897    898    899    900 
##      1      1      2      1      1      1      1      2      3      2      3      1      1      1      4      5      1      3      3      4      3      1      1      1 
##    901    902    903    904    905    906    907    908    909    910    911    912    913    914    915    916    917    918    919    920    921    922    923    924 
##      1      3      2      2      2      4      1      2      3      2      2      2      2      1      2      3      1      1      1      2      3      1      1      1 
##    925    926    927    928    929    930    931    932    933    934    935    936    937    938    939    940    941    942    943    944    945    946    947    948 
##      1      1      2      3     10      2      1      2      2      2      4      4      2      3      2      3      2      3      3      2      2      1      2      4 
##    949    950    951    952    953    954    955    956    957    958    959    960    961    962    963    964    965    966    967    968    969    970    971    972 
##      1      1      3      3      1      3      1      2      2      7      1      3      1      2      1      1      1      2      3      1      2      3      1      3 
##    973    974    975    976    977    978    979    980    981    982    983    984    985    986    987    988    989    990    991    992    993    994    995    996 
##      1      4      3      5      2      4      3      1      3      4      2      2      2      1      1      1      1      1      1      4      1      3      2      1 
##    997    998    999   1000   1001   1002   1003   1004   1005   1006   1007   1008   1009   1010   1011   1012   1013   1014   1015   1016   1017   1018   1019   1020 
##      1      3      1      3      1      1      3      2      3      4      2      3      2      2      2      1      1      2      2      1      2      1      2      2 
##   1021   1022   1023   1024   1025   1026   1027   1028   1029   1030   1031 999999   <NA> 
##      2      3      2      2      6      6      7      3      3      3      1     97   1732 
## [1] "Frequency table before encoding"
## Distrito. Distrito
## Missing-MINEDU           <NA> 
##           1016           1732 
## [1] "Frequency table after encoding"
## Distrito. Distrito
##  620 <NA> 
## 1016 1732 
## [1] "Frequency table before encoding"
## Provincia. Provincia
## Missing-MINEDU           <NA> 
##           1016           1732 
## [1] "Frequency table after encoding"
## Provincia. Provincia
##  721 <NA> 
## 1016 1732 
## [1] "Frequency table before encoding"
## prov. Provincia
## Missing-MINEDU           <NA> 
##           2709             39 
## [1] "Frequency table after encoding"
## prov. Provincia
##  796 <NA> 
## 2709   39 
## [1] "Frequency table before encoding"
## dist. Distrito
## Missing-MINEDU           <NA> 
##           2709             39 
## [1] "Frequency table after encoding"
## dist. Distrito
##  558 <NA> 
## 2709   39 
## [1] "Frequency table before encoding"
## cod_mod2. Código modular
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  206334  207373  207407  216341  219741  220285  226704  232207 
##      10       8       2       5       6       7      10       7       6       7       9       2       5       3       1       1       8       2       3       6       9 
##  232223  232231  232249  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130  233296  233361 
##       9       6       2       7       3       6       3       5       5       4       3       7       5       7       2       1       3       6       4       5       4 
##  233676  233718  233734  233825  233882  233890  233908  233916  233924  233932  233940  233957  233965  233973  233981  233999  234021  234062  234096  234104  234112 
##       3       7       5       3       1       3       6       8       9       3       4       6       3       5       6       8       7       5       6       6       4 
##  234120  234138  234153  234161  234187  234195  234203  234229  234237  234351  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583  234674 
##       4       8       6       8       5       1       4       5       6       1       9       5       8       6       8       7       6       3       7       3       9 
##  234682  234781  234831  234856  236158  236349  236422  236448  236463  236471  236489  236653  236661  236927  287409  287425  287466  309286  309294  309377  309419 
##       7       3       7       7       5       5      16       4       8       1       7       1      31       9      10       6       3       1      12       1       3 
##  309435  309567  310433  310441  312090  312215  312306  312421  312744  312868  313080  313239  313395  313460  313890  313908  313965  313981  314070  314187  314211 
##       1       4       3       1       2       6      10       5       2       2       1       2       9       1       3       8       6       6       2       5       4 
##  314237  314245  314252  314260  314278  314294  405258  405498  405704  405738  405746  405852  405894  405902  405928  405936  406009  406066  406082  406116  406124 
##       4       4       6       6       9       5       5       8       5       5       4       9       8       8       8       9       6      10       6       6       8 
##  406140  406215  406223  406264  406413  406595  406629  406645  406975  406983  407007  407049  408211  408245  408278  408286  408294  408328  408336  408393  408468 
##       3       5       6       4       7       8       7       5       6      10       4       7       1       7       4       4       3       5       1       4       8 
##  408476  408484  408492  408559  408567  408609  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193  409227  409235  409243 
##       7       5       5       3       8       6       8       3       4       3       8       7       4       5       6      10       9       5       8       7       6 
##  409284  409292  409300  409318  409326  409359  409441  409565  409896  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249  481283  486688 
##       9       7       9       7       4       7       9       3       2       1       7       5       1       3       7       9       5       4       7       9       8 
##  486928  489120  495069  495325  498782  499863  502922  504142  517581  517888  518084  518472  519496  519595  519678  525923  550392  551309  557587  579268  579276 
##       2       8      14      16       1       3       2      13      25       8      22      10       6       6       6      21       7       3       9       9       1 
##  579284  579292  579300  585885  587055  587147  587204  589200  589747  589804  591255  591602  592147  612051  612119  612291  612416  612507  612689  612747  612770 
##       1      17      20       6      15       4      13       1       1       5      23      14       3       2       2       5       2       9       1       1       1 
##  612804  615013  616110  617787  617829  621391  623017  623041  637272  639542  647388  647412  647446  647628  655746  671628  672105  678961  679829  680058  680082 
##       4       4       6      17       6      15       4       2       9       2      10       7      16       4       1       4       4       2       2       8       1 
##  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  775700  776039  783423  783597  783621  783696  783704  783720  783787  783795  791319 
##      17      11       2       2       2       3       6       2       5       2       5       3      12       1       1      11       1      14      10      10      20 
##  791574  794438  796888  818674  818708  844159  844183  891408  891812  895482  899351  927871  930958  931055  931063  932236  932434  932491  932608  932848  933226 
##       4       1       2       6       3       2       4      14       1       1       3      10       1      15      18      12       2       4       8       7       6 
##  933283  933291  933317  933531  933598  933846 1031574 1117704 1120005 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 1321355 1321421 
##      10       1       6       1       2       6       4      10       1      14       6       2       6       1       2       2      10       6      11       9      11 
## 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1374438 1377209 
##       2       9       3       9      11      13       7       1      13       8       1       7      19      11      14       1       7      14       7       1      11 
## 1377233 1377415 1379361 1379544 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665 1390673 1392083 1392091 1392109 
##       1      10       1       4      18       4      10       2       5      11       9       5       2      11       6       1      17      16       7       6      10 
## 1392117 1392125 1392141 1392174 1392216 1392224 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1401934 1401942 1401959 1402536 1408426 1412634 
##       7      13      16      14       2       7       3       8      19      11      27      17       4       8       1      11       8      17       7       2       5 
## 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 
##       3       4       4      12       5       7       9       9      12       9       8       6       7      10      10       7       8       8       3       4       9 
## 1666130 1723469    <NA> 
##       1       2      39 
## [1] "Frequency table after encoding"
## cod_mod2. Código modular
##  340  341  342  343  344  345  346  347  348  349  350  351  352  353  354  355  356  357  358  359  360  361  362  363  364  365  366  367  368  369  370  371  372 
##    2    1    3    9    7    5    8    5    7    1    1    1    1    4   14    8    3    5   10    6    2    7    8    8    6    5    6    2    2    3    7    6    6 
##  373  374  375  376  377  378  379  380  381  382  383  384  385  386  387  388  389  390  391  392  393  394  395  396  397  398  399  400  401  402  403  404  405 
##    8   18    5   19    3   10    6   13    8    4    7   10    6    4    9    4    1    8    4    1    6   16    2   17    5    4    7    2    2   20    9    5    5 
##  406  407  408  409  410  411  412  413  414  415  416  417  418  419  420  421  422  423  424  425  426  427  428  429  430  431  432  433  434  435  436  437  438 
##    5   22    4    3    1    4    2    2    3   16   12    1    4    4   20    5    5    7    9    5    4   27    1    6    8    1    7    5   14    7    3    9   15 
##  439  440  441  442  443  444  445  446  447  448  449  450  451  452  453  454  455  456  457  458  459  460  461  462  463  464  465  466  467  468  469  470  471 
##    3    1   10    8    2   16    4    3    6    6   14   12    8   17    9   17    4    2    9   11    5    6    6    2    6    8   11    6   15    1   11    8    5 
##  472  473  474  475  476  477  478  479  480  481  482  483  484  485  486  487  488  489  490  491  492  493  494  495  496  497  498  499  500  501  502  503  504 
##   11    3   10    1    6    7    1   12    3    2   13   10    8    4    5    6    2    3    2    1    5    7    1    7   17    6    7    9    3    6    6   10    4 
##  505  506  507  508  509  510  511  512  513  514  515  516  517  518  519  520  521  522  523  524  525  526  527  528  529  530  531  532  533  534  535  536  537 
##    7    8    5    9    9   10    3    6    9    9    3    9    3    1    6    5    7    6    3    2    8    5    4    3    8   14    5    6    1    4    5    9    6 
##  538  539  540  541  542  543  544  545  546  547  548  549  550  551  552  553  554  555  556  557  558  559  560  561  562  563  564  565  566  567  568  569  570 
##   14   12    8    7   10    8   10    7    3    4    9    7    6    6    3   10   14    2    4    6    3    2   13    6    1    7    5    6    4    6    2    1    5 
##  571  572  573  574  575  576  577  578  579  580  581  582  583  584  585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  601  602  603 
##    7    4    6    9    1    4    7    9    6    2    5    1    1    4    7    4    8    3    3    3    7    6   10    1    8   11    4   10    9    2    2    9    7 
##  604  605  606  607  608  609  610  611  612  613  614  615  616  617  618  619  620  621  622  623  624  625  626  627  628  629  630  631  632  633  634  635  636 
##    1    1    9   11    6    8    3    1    6    9    5   10    7    3    7    1    1    8    4   15   16    1    9    7    6   11    2   25    8    8    4    8    4 
##  637  638  639  640  641  642  643  644  645  646  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663  664  665  666  667  668  669 
##    2    7    2    6    9    8    9    2    5    1   18    8    4    1   13    1    6    2    1    6    2   14    1    8    6    6    5    1    7    2    7    3    7 
##  670  671  672  673  674  675  676  677  678  679  680  681  682  683  684  685  686  687  688  689  690  691  692  693  694  695  696  697  698  699  700  701  702 
##    4   10    1    1   10    4    8    1    2    4    7    2    4   23    7    5    4    3    4   21    1    2    9    4    6    7   31    1    8    5    3    4    7 
##  703  704  705  706  707  708  709  710  711  712  713  714  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731  732  733  734  735 
##    7    3    6   10    3   16    1    2    9    6    1    3    9   10    3    5    9    4   11   12    9    5   11    7    8    7   14    2    5   19    1    3    1 
##  736  737  738  739  740  741  742  743  744  745  746  747  748  749  750  751  752  753  754  755  756  757  758  759  760  761 <NA> 
##    1   17    5    5    7   11    7    3    5    7    8    6    5    8    1   10    2   13    2   11    2    4   17   10    2    3   39 
## [1] "Frequency table before encoding"
## COD_MOD. 
##  204875  205005  205047  205120  205153  205773  206334  216341  220285  226704  232207  232223  232231  232249  232264  232504  232512  232538  232546  232553  232561 
##      10       8      10       4       1       6       5       9       5       6       8      10      12       7      10       6      14       3      12       9      13 
##  232579  232587  232595  232603  232611  232645  232728  232777  233296  233361  233676  233718  233734  233825  233890  233908  233916  233924  233940  233957  233965 
##      10      15       7      12       4       4      10      14       6      12       5      17      15      12      17       7      17      16       3       6       7 
##  233973  233981  233999  234021  234062  234070  234096  234104  234112  234120  234138  234153  234161  234187  234195  234203  234211  234229  234237  234369  234377 
##      17      12      16      12       7       7      10      17      14      17      15      16      16      17       8       7       5       7      15      16      11 
##  234385  234401  234419  234427  234443  234450  234500  234583  234674  234682  234781  234831  234856  287409  287417  287425  287466  312090  312207  312215  312306 
##      10      10      15      14      18       8      14       5      17       2       4      15      17      17       2       9       3       5       1       6      13 
##  312421  312744  312868  313239  313395  313460  313890  313908  313965  313981  314096  314187  314211  314237  314245  314252  314260  314278  314294  405258  405498 
##       9       2       6       4      15       2       9      13      10      16       4      11       9       7      10      12      10      10      12       4      10 
##  405704  405738  405746  405753  405852  405886  405894  405902  406082  406629  406645  406983  407007  407049  408245  408278  408286  408294  408328  408344  408393 
##       1       7       1       2       3       1      12      15      16       3       3      17       5      17       6       9       3       1       4       9      13 
##  408468  408476  408484  408492  408526  408559  408567  408583  408609  408666  408732  408773  408823  408856  408955  409003  409011  409029  409193  409292  409300 
##       9       9       5       6       1       8      15       1       4      14      10       4       2       8       1      12       1      16       1      15      13 
##  409326  409359  409441  409565  409896  410480  410514  410670  410738  410746  410779  410787  410803  473249  481283  499863  502922  504142  517888  550392  557587 
##      11      12       2       5       2       9       8       6       3       5       9       5       7       9       9      12       3       2      13       3      10 
##  587147  612291  612416  612689  612747  612804  615013  623017  623041  637215  647388  647412  671628  678904  678961  679829  680058  712562  712711  723031  731273 
##       8      16       2       2       1       6      12      10       3       2       8       3       4       2       3       6       9       6       6       5       1 
##  735498  736116  775700  783423  783597  796888  818674  818708  844159  844183  899351  932434  932491  932848 1117944 1201870 1266428 1412634    <NA> 
##       6       4      11      16       2       4       7       6       6       5       4       5       8       1       2      17       4      10    1031 
## [1] "Frequency table after encoding"
## COD_MOD. 
##  892  893  894  895  896  897  898  899  900  901  902  903  904  905  906  907  908  909  910  911  912  913  914  915  916  917  918  919  920  921  922  923  924 
##    6    6    4    1    7   11   10    4    1   17   13   15    9   17    2   10    6   14    6    5   12   15    4    9    6    6    1    9    4    5   10    5    7 
##  925  926  927  928  929  930  931  932  933  934  935  936  937  938  939  940  941  942  943  944  945  946  947  948  949  950  951  952  953  954  955  956  957 
##    3    4    7   10    7    6    5    5    5   14    2   16   15    7   13   10    4    9    2    1    8   13   17   15    8    3   16    3    6    4    2   12    8 
##  958  959  960  961  962  963  964  965  966  967  968  969  970  971  972  973  974  975  976  977  978  979  980  981  982  983  984  985  986  987  988  989  990 
##    7    6   10   12    2   10   15   17    5    5    6    9    5    3    9    4    8   10   17    9    3   14   16   10    4    7   12    4   13    8    4    6    6 
##  991  992  993  994  995  996  997  998  999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 
##    4    9    3   17   12    2   10   16   10    9   16    8    2    2   14    2   15   10   13   16   13   12    8    3    1    7    3   18    2    1    5    2   14 
## 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 
##    1   17   12    1   17   12   10    7   11    4   17   16    9   11    9    1    8    5    1    6    9    8   16    6   10    9   10    3    2    5    6    1   12 
## 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 
##    7    2   10   17    5    4    7    9   17   11   10   15   12   10    8   17   12    9    2   14   15    7    1    4    1    3    6   12   12    6   16    3    3 
## 1090 1091 1092 1093 1094 1095 1096 1097 1098 <NA> 
##   17   16   12    1    2    5   15    3   15 1031 
## [1] "Frequency table before encoding"
## cod_mod. cod_mod
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  206334  216341  220285  226704  232207  232223  232231  232249 
##      15       9       7       9      10      15       9      16      14      15      18       2       8       5       6       2      11      11      11      10       7 
##  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233296  233361  233676  233718  233734  233825 
##      10       7      15       4       8       7      15       7       7       8      11       2       4       9      11       6      12       2      10      12       4 
##  233890  233908  233916  233924  233940  233957  233965  233973  233981  233999  234021  234062  234070  234096  234104  234112  234120  234138  234153  234161  234187 
##       7       5      17       1       3      12       7       6       4       1       8      11       7      19      17      14      10       9      13       1       1 
##  234195  234203  234211  234229  234237  234369  234377  234385  234401  234419  234427  234450  234500  234674  234682  234781  234831  234856  287409  287417  287425 
##       1       3       2       3       3       2       8       2       6       1       9       5       3       1       8       2       3      17       4       2       3 
##  287466  312090  312215  312306  312421  312744  312868  313395  313890  313908  313965  313981  314187  314211  314237  314245  314252  314260  314278  314294  405258 
##       3       2       3       4       2       2       1       4       2       2       2       1       2       2       1       2       2       2       2       2       9 
##  405498  405704  405738  405746  405753  405852  405894  405902  405928  405936  406009  406041  406066  406082  406116  406124  406140  406215  406223  406264  406413 
##      10       7      14       8       2      13      12      16      14      16      16      14      16      16      11      13       3      16      13       5      16 
##  406595  406629  406645  406975  406983  407007  407049  408245  408278  408286  408294  408328  408344  408393  408468  408476  408484  408492  408526  408559  408567 
##      14      12      13      15      17      11      17       7       9       7       3      13       4      13      12      13      13      13       4       8      15 
##  408583  408609  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193  409227  409235  409243  409284  409292  409300  409318 
##       7       8      11       1       6       8      16      17       9      13       5      11       1       3      16      12      10      15      10      12      16 
##  409326  409359  409441  409565  409896  410480  410514  410670  410738  410746  410779  410787  410803  473249  481283  486688  499863  504142  517888  519496  519595 
##       6       8      12      11       1       4       8       4       8       6       1       3       6       8      11      15       2      13       3      11      15 
##  550392  551309  557587  585885  587147  592147  612291  612416  612689  623017  623041  637215  647388  647412  647628  671628  672105  678961  679829  680058  712562 
##      12       9       5      10       2       4      16       2       1       2       1       2      16       5      11       7       6       3       6      10       6 
##  712711  723031  730655  731273  731596  735498  775700  776039  783423  783597  796888  818674  818708  844159  844183  899351  930958  932434  932491  932848 1201870 
##       1       2       9      11       4       3       9       7      11       2       1       7       4       1       5       1      15       5       9      13      17 
## 1266428 1377209 1412634    <NA> 
##       6      16      10     902 
## [1] "Frequency table after encoding"
## cod_mod. cod_mod
##  927  928  929  930  931  932  933  934  935  936  937  938  939  940  941  942  943  944  945  946  947  948  949  950  951  952  953  954  955  956  957  958  959 
##    8   12    7   10    1    2    6   12    6   12   11    4    2    2    9   15    4   15    7    1    3   16   12   13    1    2    4    1    4    3    2   16    2 
##  960  961  962  963  964  965  966  967  968  969  970  971  972  973  974  975  976  977  978  979  980  981  982  983  984  985  986  987  988  989  990  991  992 
##    1    8   18    9   11    6    1   16    5    1    1    8   14   15   15    1    5    6    1   16    2    7    3    7   12    9    2   11    1    2    8   11    9 
##  993  994  995  996  997  998  999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 
##    8   11   16   11    8    2    8    9    9   14    2   12    4    9    7    3    5   11    4    4    8   16    3   10    2    1    3   10   15    2    3    9    5 
## 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 
##   16    3    2   16    5    8   11    3   17    7    3    7    9    7    1    4    2    6   14    3   11   12   17   11   13   15   13   10   13   13    7    1   17 
## 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 
##    7   13   10    2   12    1    2    1    2    2    8    3   16   16   19   17   16    4   11    8   12    9    2    5    7   11    6    1   15    2   15    1    8 
## 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 
##   14    7   11    4    6   13    2   17    6    8    3   10   11    7   11   13   10    3   16   13    7    3   13    6    6    4    8    2    4    9   14    2   10 
## 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 
##    2    7   16    1    3    2    2    5   10    9   13   17    7    4   13    6   14    6   10    2    9   13    4   15   15   12    5    1    5   11   16   10    2 
## 1158 1159 1160 <NA> 
##   17   15    2  902 
## [1] "Frequency table before encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
##        219741 312561 405217 408922 408971 409243 612291 647388 679829 
##   2736      1      1      2      1      1      1      1      1      3 
## [1] "Frequency table after encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
##  875  876  877  878  879  880  881  882  883  884 
## 2736    1    1    3    1    1    1    2    1    1 
## [1] "Frequency table before encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
##         1253905 1345024 1347301 1347434 1347921 1379544 1380021 1392240 1395367 1401934 1402536 1452705 1540996  236109  309567  579300  589804  612507  616110  621391 
##    2704       2       1       3       1       1       2       1       1       1       1       1       1       1       1       1       1       2       1       1       1 
##  637272  680124  783720  791319  931055  931436  932608  933226  933317 
##       7       1       2       2       3       1       1       1       1 
## [1] "Frequency table after encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
##  456  457  458  459  460  461  462  463  464  465  466  467  468  469  470  471  472  473  474  475  476  477  478  479  480  481  482  483  484  485 
##    1    2    1    1    1    1    1    2    7    3    1    2    1    3    1 2704    1    1    2    1    2    1    1    1    1    1    1    1    1    1 
## [1] "Frequency table before encoding"
## cole2016_admin. 
##  55610  58185  59420  59509  59665  60433  62922  63530  64068  65421  66736  67161  68599  68603  68655  68679  68735  68900  68924  68938  68943  68957  68962  68976 
##      1      1      1      1      1      1      1      1      1      2      1      1     12      3      3      2      1      3      1      4      2      8      2      3 
##  68981  69103  69179  69235  69551  69669  69706  69810  69985  70007  70031  70074  70088  70111  70149  71115  71498  71592  71629  71733  71752  71766  71790  71926 
##      8      2      3      2      2      1      4      1     16      2      1      2      2      1      5      2      1      3      1     15      4      2      1      2 
##  71931  73119  73162  73181  73195  73280  73303  73322  73341  73398  73435  73459  73529  73534  73548  73553  73567  73572  73591  73609  73789 130308 142655 146154 
##      1     12     10      2      8      2      6      8      4     19      4      2      3     14      3      6      6      7      4      6     11     14      7      5 
## 147484 147686 147709 147714 148520 148600 148997 150122 150136 150202 150221 150259 150513 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 150773 
##      4      4      3      2      5     10      2      4      4      7      5     15      6     21      6      6      6      5      3     16      6      9      7     12 
## 150792 150834 150848 150966 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664 151678 152060 152215 152239 152263 152282 152574 152588 
##     17      4     14     15     16      4      7      3      9      9      7      1      8      1     10      3      1      1     14     15      4      4      4      5 
## 152593 152606 152625 152668 152673 152734 152753 152786 153540 153818 153823 153837 153842 153861 153875 153880 153899 153903 153922 153941 153955 154021 154035 154064 
##      6     10      3     12      9     15      6     17      1     24      6      1      5      2     22      5      4      6      1      4     15     14      7      3 
## 154078 154083 154097 154120 154200 154238 154262 154549 155054 157010 157053 157072 157190 157213 157227 157345 157350 157374 157393 157406 157487 157492 157500 157538 
##      3      6      3     25      2      1      6      2      1      3     15      5      6      1      8      6      1     31     10      6     14      3      6      1 
## 157543 157595 157604 157618 157623 157656 157661 157680 157703 157717 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 157982 
##      6      8     22      3      7      3      9      7      5     17     14     10      1      5      4      8      6     15     18      6      3      6     13     10 
## 158024 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359 158364 158378 158383 158401 158415 158444 158458 158477 
##      3      5      4     16      6     13     11      8      9      8     21     11      6      1     15     19      4      4     13     22      6      1      8      6 
## 158482 158496 158509 158547 158590 158608 158627 158632 158646 158665 158670 158707 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 159797 159815 
##     17     16     15      2     16      4      5      1      1      4      4      2      3      4      2     24      2      1      6      5     14      2      5     17 
## 164930 164968 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637 165680 165699 165703 165717 165736 165741 165784 165798 
##      1      7      8      4      3     16      6      8      7      4      4      2      8      2      2     17      8      7      4      5      8     26      7      4 
## 165802 165840 165864 165915 165920 166038 166076 166104 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 167189 167194 
##      5      4     11      3      2      1     16      7     11      2      1      4      7      6      9      8      2      4      3      2     20     20      4      5 
## 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580 167599 167603 167617 167636 167641 167679 167684 169126 169150 170196 
##      8     19     22     21      7      6      5      4      4      2     24      5     16      7      5     14      9      6      6      1     23      1     13      1 
## 170200 170219 170304 170318 170479 170484 170506 170610 170709 170832 170865 170907 170931 171134 343357 462430 462543 505991 508447 515508 517084 517102 520915 526465 
##      6      3      5      8      8      5     14      2      1      4      5      5      2      5     11      9      2     13      3      7      1      7      1     14 
## 526470 531928 534658 535506 538208 538227 538779 555306 556042 560162 562439 563151 571844 582376 585308 590263 601493 602242 603468 603581 603699 603717 603755 605066 
##     16     14      7      1      9      1     10      7      1      2      1      5      7      4      7      8     18      5     11      6     16      1      3      3 
## 605132 605146 609248 611760 748169 748739 999999   <NA> 
##      3      8     27      1      2      1    200     39 
## [1] "Frequency table after encoding"
## cole2016_admin. 
##    894    895    896    897    898    899    900    901    902    903    904    905    906    907    908    909    910    911    912    913    914    915    916    917 
##     10      3      1     10      2      1      5      7      2      6      9      7      7     17     11      6      5     24      7      4      7      6      1      6 
##    918    919    920    921    922    923    924    925    926    927    928    929    930    931    932    933    934    935    936    937    938    939    940    941 
##      5      1      4      6      4     12      1      6      8      5      3      3      1      2      1      7     20      4     17      6      2      9      2     24 
##    942    943    944    945    946    947    948    949    950    951    952    953    954    955    956    957    958    959    960    961    962    963    964    965 
##      8      5     15      9     21      3      8     19      2     15      1      6      5      4      5      5      4      2      2     22      5      8     16      4 
##    966    967    968    969    970    971    972    973    974    975    976    977    978    979    980    981    982    983    984    985    986    987    988    989 
##      4      9      4      9     16      1     15     16      3     15     13      6      1      2      1     14      1      6      1      9      4      4      4      2 
##    990    991    992    993    994    995    996    997    998    999   1000   1001   1002   1003   1004   1005   1006   1007   1008   1009   1010   1011   1012   1013 
##      2     12      8      3      3     19      5      4      3     14      5     14      1      8      3      2      3      2      1      8      6      4     21      5 
##   1014   1015   1016   1017   1018   1019   1020   1021   1022   1023   1024   1025   1026   1027   1028   1029   1030   1031   1032   1033   1034   1035   1036   1037 
##      6      1      6      6      7     19     10      5      1     14      2      8      4     17      3      5      4     13      5      3      5      1      5     15 
##   1038   1039   1040   1041   1042   1043   1044   1045   1046   1047   1048   1049   1050   1051   1052   1053   1054   1055   1056   1057   1058   1059   1060   1061 
##      1      7      1      2     18      2      6      7     14      5      1     16     10      3      8     15      7     16      4     31     18      5      8     15 
##   1062   1063   1064   1065   1066   1067   1068   1069   1070   1071   1072   1073   1074   1075   1076   1077   1078   1079   1080   1081   1082   1083   1084   1085 
##      2      8     11      6      3      4      6     14      6     22      7      1     13      8      3      6      2      6      4      1      6      7      5      6 
##   1086   1087   1088   1089   1090   1091   1092   1093   1094   1095   1096   1097   1098   1099   1100   1101   1102   1103   1104   1105   1106   1107   1108   1109 
##      8     13      1      1     11      6      1     25      2      6     14      1      4     17      1     20      4      4     14      4      8      2      2     17 
##   1110   1111   1112   1113   1114   1115   1116   1117   1118   1119   1120   1121   1122   1123   1125   1126   1127   1128   1129   1130   1131   1132   1133   1134 
##      1      2      3      9      1      2      7      5      3     14      1      5      1     15      6      4     15      1      2      1     22      2     13      1 
##   1135   1136   1137   1138   1139   1140   1141   1142   1143   1144   1145   1146   1147   1148   1149   1150   1151   1152   1153   1154   1155   1156   1157   1158 
##      4     21      6      1      6      2      3      4      5      1      2      4      2     27      7      6      1      2      1      8      1      9     12     16 
##   1159   1160   1161   1162   1163   1164   1165   1166   1167   1168   1169   1170   1171   1172   1173   1174   1175   1176   1177   1178   1179   1180   1181   1182 
##      4      7      1      8      3      8     14     16      5      8      5      2     24      3      6     22      7      1      6      9      3      2      6     11 
##   1183   1184   1185   1186   1187   1188   1189   1190   1191   1192   1193   1194   1195   1196   1197   1198   1199   1200   1201   1202   1203   1204   1205   1206 
##      3     14     26      5      2      6     14      4      2      7     11      8      1     16      2      4      5     10      4     15      1      6      3     23 
##   1207   1208   1209   1210   1211   1212   1213   1214   1215   1216   1217   1218   1219   1220   1221   1222   1223   1224   1225   1226   1227   1228   1229   1230 
##      4      8      9      8      2     17      1      1      1      8      3      3      7      7      4      3      3     11      1      1     12     16      4      6 
##   1231   1232   1233   1234   1235   1236   1237   1238   1239   1240   1241   1242   1243   1244   1245   1246   1247   1248   1249   1250   1251   1252   1253   1254 
##      2     16      3      3      1      4      2      1      3      4      1      6      1      7      1      4      2      1     11      2     10      4      7     10 
##   1255   1256   1257   1258   1259   1260 999999   <NA> 
##      3      7     16     10      5      7    200     39 
## [1] "Frequency table before encoding"
## cod_mod_2016. 
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  206334  207373  207407  216341  220285  226704  232207  232223 
##      10       6       2       5       6       7      10       7       6       7       8       2       5       2       1       1       7       3       6       8       8 
##  232231  232249  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130  233296  233361  233676 
##       6       1       7       3       6       3       5       5       4       3       7       5       7       2       1       3       6       4       5       4       3 
##  233718  233734  233825  233882  233890  233908  233916  233924  233932  233940  233957  233965  233973  233981  233999  234021  234062  234096  234104  234112  234120 
##       7       5       3       1       3       6       8       9       3       4       6       3       5       5       8       7       5       6       6       4       4 
##  234138  234153  234161  234187  234195  234203  234211  234229  234237  234351  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583  234674 
##       7       6       8       5       1       3       1       4       6       1       9       5       8       6       8       7       6       3       6       3       9 
##  234682  234781  234831  234856  236158  236349  236422  236448  236463  236471  236489  236653  236661  236927  287409  287425  287466  309286  309294  309377  309419 
##       7       3       7       7       5       5      16       4       8       1       7       1      31       8      10       6       2       1      12       1       3 
##  309435  309567  309682  310433  310441  312090  312215  312306  312421  312744  312868  313080  313239  313395  313460  313890  313908  313965  313981  314070  314187 
##       1       4       1       3       1       2       5      10       4       2       2       1       2       9       1       4       8       6       6       2       6 
##  314211  314237  314245  314252  314260  314278  314294  405258  405498  405704  405738  405746  405837  405852  405894  405902  405928  405936  406009  406066  406082 
##       4       4       4       6       6       8       5       6       8       5       5       4       1       9       7       7       7       8       6      10       6 
##  406116  406124  406140  406215  406223  406264  406413  406595  406629  406645  406975  406983  407007  407049  408211  408245  408278  408286  408294  408328  408336 
##       6       6       1       5       5       3       6       7       8       4       5      10       4       6       1       7       4       4       3       5       1 
##  408393  408468  408476  408484  408492  408559  408567  408609  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193  409227 
##       4       8       7       5       5       3       8       6       8       2       4       3       8       7       4       5       6       9       9       2       8 
##  409235  409243  409284  409292  409300  409318  409326  409359  409441  409565  409896  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249 
##       7       5       9       7       9       6       4       7       9       3       1       1       6       5       1       3       6       9       5       4       5 
##  481283  486688  486928  489120  495069  495325  498782  499863  502922  504142  517581  517888  518084  518472  519496  519595  519678  525923  550392  551309  557587 
##       9       8       2       8      14      16       1       2       2      10      26       7      22      10       6       6       6      21       7       3       9 
##  579268  579276  579284  579292  579300  585885  587055  587147  587204  589200  589747  589804  591255  591602  592147  612051  612119  612291  612416  612507  612689 
##       9       1       1      17      20       6      15       4      13       1       1       5      23      14       4       2       2       5       1       9       1 
##  612747  612770  612804  615013  616110  617787  617829  621391  623017  623041  637272  639542  639617  647388  647412  647446  647628  655746  671628  672105  678961 
##       1       1       4       4       6      17       6      15       4       2       8       2       1       9       7      16       4       1       4       4       1 
##  679829  680058  680082  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  775700  776039  779041  783423  783597  783621  783696  783704 
##       2       9       1      17      12       1       2       2       2       5       2       4       2       5       3       1      12       1       1      11       1 
##  783720  783787  783795  791319  791574  794438  796888  818674  818708  844159  844183  891408  891812  895482  927871  930958  931055  931063  932236  932434  932491 
##      14      10      10      20       4       2       2       5       2       2       3      14       1       1      10       1      14      18      12       1       4 
##  932608  932848  933226  933283  933291  933317  933531  933598  933846  999999 1031574 1117704 1120005 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 
##       8       7       6      10       1       6       1       2       6      97       4      10       1      14       6       2       4       1       2       2      10 
## 1321322 1321330 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 
##       6      11       9      11       2       9       3       9      11      13       7       1      13       8       1       7      19      11      14       1       7 
## 1369248 1372507 1374438 1377209 1377233 1377415 1379361 1379544 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665 
##      14       7       1      10       1      10       1       1      18       4      10       2       5      11       9       5       2      11       8       1      17 
## 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932 1401934 
##      16       7       5      10       7      13      16      14       2       6       3       8      19      11      27      17       4       8       1       1      11 
## 1401942 1401959 1402536 1408426 1412634 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 1625532 
##       8      17       7       2       5       3       5       4      12       4       7       9      10      12       9       8       6       7      10      10       7 
## 1625557 1625573 1630631 1637263 1659101 1666130 1723469 
##       8       8       3       4       9       1       2 
## [1] "Frequency table after encoding"
## cod_mod_2016. 
##    504    505    506    507    508    509    510    511    512    513    514    515    516    517    518    519    520    521    522    523    524    525    526    527 
##      1      7      3      2      6      6      6     10      3      5      4      3     15      4      9      3      1      8     12      8      4      8      1     11 
##    528    529    530    531    532    533    534    535    536    537    538    539    540    541    542    543    544    545    546    547    548    549    550    551 
##      2      2      1     17      1      7      7      8      3      1      5      9     11      4      1      8      8      6      4      8     12      5      3      9 
##    552    553    554    555    556    557    558    559    560    561    562    563    564    565    566    567    568    569    570    571    572    573    574    575 
##      1      9      7     16      4      3      7      7      4     11      5     10      2      3      2      8      5      6      8     16      6      2      8      1 
##    576    577    578    579    580    581    582    583    584    585    586    587    588    589    590    591    592    593    595    596    597    598    599    600 
##      9      7      4      3      5      6      8      3      5      2      9     19      7      4      9      2      1      1     10     10      9     14      5      5 
##    601    602    603    604    605    606    607    608    609    610    611    612    613    614    615    616    617    618    619    620    621    622    623    624 
##      7      2      4      8      7      6      6      1      6      4      6      6      8     12      8      2      8      2     17      5      4      4      4      5 
##    625    626    627    628    629    630    631    632    633    634    635    636    637    638    639    640    641    642    643    644    645    646    647    648 
##      1      8     14      1     20      7      1      7      4      5      1     10      5     26      6      8      6     10     16      6      7      1     16      6 
##    649    650    651    652    653    654    655    656    657    658    659    660    661    662    663    664    665    666    667    668    669    670    671    672 
##      5     17      2     11      2      5      9      2     17     13      1      3      1     14      1     22      7     13      5      1      6      5      1     10 
##    673    674    675    676    677    678    679    680    681    682    683    684    685    686    687    688    689    690    691    692    693    694    695    696 
##      5      8      9      1      9      3      7      9      1      2      4      2      2      4      6      1      4      4     10      2     11      2      4      5 
##    697    698    699    700    701    702    703    704    705    706    707    708    709    710    711    712    713    714    715    716    717    718    719    720 
##      6      7      5      4      6      8      7      1      6      8      4      2      3     13      9      7     10      2      9      4      2      6      3      4 
##    721    722    723    724    725    726    727    728    729    730    731    732    733    734    735    736    737    738    739    740    741    742    743    744 
##     17      1      3     11      2      4      1     18      2      1      4     14      5      5      1      9      1      6      4     11      6      6      5     10 
##    745    746    747    748    749    750    751    752    753    754    755    756    757    758    759    760    761    762    763    764    765    766    767    768 
##      1      5      2      4      5      9     10      6      2      5      3      6      7     23      5     14     13      7      1     20      7      7      1      5 
##    769    770    771    772    773    774    775    776    777    778    779    780    781    782    783    784    785    786    787    788    789    790    791    792 
##      4      2      1      2      3      1      1      1      5      6      1      7      5      8      6      8      5      6      3      1     10      9     19      7 
##    793    794    795    796    797    798    799    800    801    802    803    804    805    806    807    808    809    810    811    812    813    814    815    816 
##      1      3      4     31      7      6      4     10      4     10      6      9      8      4      2      7     10      5      1      3      8      4     27      7 
##    817    818    819    820    821    822    823    824    825    826    827    828    829    830    831    832    833    834    835    836    837    838    839    840 
##      4      8      2      9      1      8     10      3     10      7      8      9      6     14     14      2      1      8      6      6     10      5      1      1 
##    841    842    843    844    845    846    847    848    849    850    851    852    853    854    855    856    857    858    859    860    861    862    863    864 
##      8      2      1     12      4      5      6     11      6      4      1     17     14      7      7      3     15      1      1     18      6      6      1      4 
##    865    866    867    868    869    870    871    872    873    874    875    876    877    878    879    880    881    882    883    884    885    886    887    888 
##      9      6      1      2      3      3      1      3      7      8     12      9      6      1     10      6      7      6      5      2      5      4      2     16 
##    889    890    891    892    893    894    895    896    897    898    899    900    901    902    903    904    905    906    907    908    909    910    911    912 
##     21      6      5      3      1      2      1      3      1      6      7      7      7      9     10      4      6      2      1      7      1      6      4      7 
##    913    914    915    916    917    918    919    920    921    922    923    924    925    926    927    928    929    930 999999 
##      7      9      1      8      1      8      3      5     14      7     12      3      5      4      2     10      2     11     97 
## [1] "Frequency table before encoding"
## cod_mod_2015. 
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  206334  216341  220285  226704  232207  232223  232231  232249 
##      13      10       6       9      10      15      10      17      14      14      14       5       8       5       9       5      17      15      14      15       7 
##  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233296  233361  233676  233718  233734  233825 
##      15      10      17       4      16      12      13      10      15       8      16       5       4      10      14       6      12       5      17      15      14 
##  233890  233908  233916  233924  233940  233957  233965  233973  233981  233999  234021  234062  234070  234096  234104  234112  234120  234138  234153  234161  234187 
##      17       7      17      16       3      15       7      17      12      16      15      15      15      15      17      14      17      16      17      16      17 
##  234195  234203  234211  234229  234237  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583  234674  234682  234781  234831  234856  287409 
##       8       8       5       7      15      16      11      10      10      15      14      18       8      14       5      17       9       7      15      17      17 
##  287417  287425  287466  312090  312207  312215  312306  312421  312744  312868  313239  313395  313460  313890  313908  313965  313981  314096  314187  314211  314237 
##       2       9       3       5       1       6      14       9       2       6       4      15       2       9      13      10      16       4      11       9       7 
##  314245  314252  314260  314278  314294  405258  405498  405704  405738  405746  405753  405852  405886  405894  405902  405928  405936  406009  406041  406066  406082 
##      10      12      10      10      12      10      10       7      13       8       2      14       1      13      16      12      16      16      14      16      16 
##  406116  406124  406140  406215  406223  406264  406413  406595  406629  406645  406975  406983  407007  407049  408245  408278  408286  408294  408328  408344  408393 
##      11       9       3      16      12       4      15      12      15      15      14      17      13      17      13       9       6       4      16       9      13 
##  408468  408476  408484  408492  408526  408559  408567  408583  408609  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193 
##      17      16       9      11       5       8      15       6      12      14      10      11       9      14      15      10      13      14      15      16       5 
##  409227  409235  409243  409284  409292  409300  409318  409326  409359  409441  409565  409896  410480  410514  410670  410738  410746  410779  410787  410803  473249 
##      16      12       9      15      16      16      13      13      16      13      13       3      12       8       9      11       9       9       8      13      12 
##  481283  486688  499863  502922  504142  517888  519496  519595  550392  551309  557587  585885  587147  592147  612291  612416  612689  612747  612804  615013  623017 
##      15      15      12       3      14      13      11      14      16       9      16      10       8       4      16       2       2       1       6      12      10 
##  623041  637215  647388  647412  647628  671628  672105  678904  678961  679829  680058  712562  712711  723031  730655  731273  731596  735498  736116  775700  776039 
##       3       2      14       8      11       8       6       2       3       6      16       6       6       5       8      12       4       6       4      11       6 
##  783423  783597  796888  818674  818708  844159  844183  899351  930958  932434  932491  932848 1117944 1201870 1266428 1377209 1412634    <NA> 
##      17       2       4      11       9       6       5       4      15       6       9      14       2      17       5      15      10      95 
## [1] "Frequency table after encoding"
## cod_mod_2015. 
##  423  424  425  426  427  428  429  430  431  432  433  434  435  436  437  438  439  440  441  442  443  444  445  446  447  448  449  450  451  452  453  454  455 
##   10   15   12   15   14   16   14   13   12   13    2   16    4    4   13   11    7    5    2   12   17    8    4   15    9   14    5    8    9    8   10   15    4 
##  456  457  458  459  460  461  462  463  464  465  466  467  468  469  470  471  472  473  474  475  476  477  478  479  480  481  482  483  484  485  486  487  488 
##   17   15   14    6   15   10   10   11    6   17   13   15    8    4   17    6   13   13    6   15   11    4    3    5   10   10    8   12    5   14   15    1    6 
##  489  490  491  492  493  494  495  496  497  498  499  500  501  502  503  504  505  506  507  508  509  510  511  512  513  514  515  516  517  518  519  520  521 
##   13   10    1    2    7   15   16   16   14   14   16   10   13   15    9   15    5    6   15    9    8    7    8   10    7   10    8   14    6   13   12   17   11 
##  522  523  524  525  526  527  528  529  530  531  532  533  534  535  536  537  538  539  540  541  542  543  544  545  546  547  548  549  550  551  552  553  554 
##    9    9   10   16   12   16    4   11   15   13    9    3    3   17   15   15    9    9    9    3   16    8   15    6    2    2   17   17   12   14   14   12   16 
##  555  556  557  558  559  560  561  562  563  564  565  566  567  568  569  570  571  572  573  574  575  576  577  578  579  580  581  582  583  584  585  586  587 
##    9   10   12    9   10   13   17   16    6   17   15   12   12    8    6    8   14    1   16   14    6    5   16   15   12    4   17   13   11   12    9    6    2 
##  588  589  590  591  592  593  594  595  596  597  598  599  600  601  602  603  604  605  606  607  608  609  610  611  612  613  614  615  616  617  618  619  620 
##   16   15    5    8    5   17   16   17    5   17   15   16    7   15    6    9    5    3    5   13   16   16   14   14    4   17    2   11    6   13   16   17   14 
##  621  622  623  624  625  626  627  628  629  630  631  632  633  634  635  636  637  638  639  640  641  642  643  644  645  646  647  648  649  650  651  652  653 
##    9    9    9   16    7   10    2    9   17    9    7   13   11   16   15   12    8   14    4    2   15   14   14   11   15   16    4   16   10   16   16   15   17 
##  654  655  656  657  658  659  660  661  662  663  664  665  666  667  668  669  670 <NA> 
##    3   18   11    6    2   17   14   16   10   10   14    5    5   10    3   10   12   95 
## [1] "Frequency table before encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la 
##         1031574 1117704 1270214 1274398 1320647 1321421 1327287 1339472 1341585 1343573 1343581 1344639 1345024 1347301 1347921 1347939 1347970 1364868 1364900 1369248 
##    2266       1       3       1       1       4       4       1       1       1       3       3       4       2       2       1       6       8       4       2       1 
## 1370378 1371095 1372507 1374438 1379320 1380021 1380120 1386432 1389279 1390095 1390467 1390517 1392083 1392091 1392117 1392125 1392174 1392224 1392232 1392240 1392257 
##       1       1       3       2       1       1       1       1       2       1       1       3       2       3       1       4       3       9       1       3       3 
## 1393313 1396191 1396209 1396225 1396852 1396886 1398932 1401934 1401942 1401959 1402536 1412873 1415983 1423003 1452705 1458348 1470582 1523802 1523810 1523828 1540988 
##       1       1       4       9       3       6       1       6       2      10       3       1       1       3       5       2       1       4       5       3       7 
## 1540996 1625532 1625557 1625573 1637263 1659101  207407  233056  233130  236158  236174  236422  236430  236463  236646  236653  236661  236927  309286  309294  309419 
##       2       1       4       6       1       4       2       2       3       3       1       1       1       2       3       1       9       1       1       9       3 
##  309567  309641  309716  310433  310441  477828  489096  495069  495325  517581  518084  518241  518472  519678  525923  579243  579292  579300  579409  587055  587204 
##       3       5       1       1       1       1       2       3       7      10       6       1       3       2      14       1       5       6       1       2      13 
##  589804  591164  591255  591602  612051  612507  617787  617829  621391  637272  647446  680082  680124  699603  712778  730515  783696  783704  783720  783787  783795 
##       1       1       3       1       2      10       6       7       7      15       6       1      11       3       1       1       6       1       1       5       3 
##  785097  791319  891408  891788  894915  927814  927871  929638  930859  931063  931436  932236  932608  933283  933317  933556  933598  933846  934141 
##       1      19       7       1       1       1       2       1       1       6       1       3       3       3       4       2       1       5       2 
## [1] "Frequency table after encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la 
##  421  422  423  424  425  426  427  428  429  430  431  432  433  434  435  436  437  438  439  440  441  442  443  444  445  446  447  448  449  450  451  452  453 
##    3    3    9    2    3    6   10   10    1    2    3    2    2    3    2    2    1    2    2    3    1    1    3    1    1    2    5    5    1    1    1    1    1 
##  454  455  456  457  458  459  460  461  462  463  464  465  466  467  468  469  470  471  472  473  474  475  476  477  478  479  480  481  482  483  484  485  486 
##    4    1    6    1    7    1    2    1    3    1    1    3    5    1    1    7    1    1    6    1    6    1    5    9    1    1    4    1    2    3    1    2    5 
##  487  488  489  490  491  492  493  494  495  496  497  498  499  500  501  502  503  504  505  506  507  508  509  510  511  512  513  514  515  516  517  518  519 
##    7    3    3    4    8    1    7   15    3    1    4    3    1    6    6    1    2    3    3    1    4    1    4   19    4    1    3    6    1    1    1   10    1 
##  520  521  522  523  524  525  526  527  528  529  530  531  532  533  534  535  536  537  538  539  540  541  542  543  544  545  546  547  548  549  550  551  552 
##    1    1    2    1    1    1    1    1    1   14    6    3    6    3    4 2266    2    2    4    1    3    3    1    9    4    3    6    2    9    3    5    1    1 
##  553  554  555  556  557  558  559  560  561  562  563  564  565 
##    2    3    1    1   13   11    3    1    3    2    7    1    1 
## [1] "Frequency table before encoding"
## codlocal. codlocal
##  55610  57968  58185  59420  59509  59665  60433  62922  63530  64068  65421  66736  67161  68599  68603  68655  68679  68735  68900  68924  68938  68943  68957  68962 
##      1      1      1      1      1      1      2      1      1      1      2      1      1     12      3      3      2      1      3      1      4      2      8      2 
##  68976  68981  69103  69179  69235  69551  69669  69706  69810  69985  70007  70031  70074  70088  70111  70149  71115  71498  71592  71629  71733  71752  71766  71790 
##      3      8      2      3      2      2      1      4      1     16      2      1      2      2      1      5      2      1      3      1     15      4      2      1 
##  71926  71931  73119  73162  73181  73195  73280  73303  73322  73341  73398  73435  73459  73529  73534  73548  73553  73567  73572  73591  73609  73789 130308 142655 
##      2      1     12     10      3      8      2      6     10      4     19      4      2      3     14      3      6      6      7      4      6     11     14      7 
## 146154 147484 147686 147709 147714 148520 148600 148997 150122 150136 150202 150221 150259 150513 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 
##      5      4      4      3      2      6     10      2      4      4      7      5     15      7     21      6      6      6      5      3     16      6      9      7 
## 150773 150792 150834 150848 150966 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664 151678 152060 152215 152239 152263 152282 152574 
##     12     17      4     14     15     16      4      7      3      9      9      7      1      8      1     10      3      1      1     14     15      4      4      4 
## 152588 152593 152606 152625 152668 152673 152734 152753 152786 153540 153818 153823 153837 153842 153861 153875 153880 153899 153903 153922 153941 153955 154021 154035 
##      5      7     10      3     12      9     15      6     17      1     24      6      1      5      2     22      5      4      6      1      4     15     14      7 
## 154064 154078 154083 154097 154120 154200 154238 154262 154549 155054 157010 157053 157072 157190 157213 157227 157345 157350 157374 157393 157406 157487 157492 157500 
##      3      3      6      3     26      2      1      6      2      1      3     15      5      6      1      8      6      1     31     10      6     14      3      6 
## 157538 157543 157595 157604 157618 157623 157656 157661 157680 157703 157717 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 
##      1      6      8     22      3      7      3      9      7      5     17     14     10      1      5      4      8      6     15     18      6      3      6     13 
## 157982 158024 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359 158364 158378 158383 158401 158415 158444 158458 
##     10      3      5      4     16      6     13     11      8      9      9     21     11      6      1     15     19      4      4     13     22      6      1      8 
## 158477 158482 158496 158509 158547 158590 158608 158627 158632 158646 158651 158665 158670 158707 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 
##      6     17     16     15      2     16      4      5      1      1      1      4      4      2      3      4      2     24      2      1      6      5     14      2 
## 159797 159815 164930 164968 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637 165680 165699 165703 165717 165736 165741 
##      5     17      1      7      8      4      3     16      6      8      7      4      4      2      8      2      2     17      8      7      4      5      8     26 
## 165784 165798 165802 165840 165864 165915 165920 166038 166076 166104 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 
##      7      4      5      4     12      3      2      1     16      7     11      2      1      4      7      6      9      8      2      4      4      2     20     20 
## 167189 167194 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580 167599 167603 167617 167636 167641 167679 167684 169126 
##      4      5      8     19     22     21      7      6      5      4      4      2     24      5     16      7      5     14      9      6      6      1     23      1 
## 169150 170196 170200 170219 170304 170318 170375 170479 170484 170506 170610 170709 170832 170865 170907 170931 171134 340231 340293 343357 462430 462543 505991 508447 
##     13      1      6      3      5      8      1      8      5     14      2      1      4      5      5      2      5      1      1     11      9      2     14      3 
## 515508 517084 517102 520915 526465 526470 531928 534658 535506 538208 538227 538779 555306 556042 560162 562439 563151 571844 582376 585308 590263 601493 602242 603468 
##      7      1      7      1     14     16     14      7      1      9      1     10      7      1      2      1      5      7      4      7     10     18      5     11 
## 603581 603699 603717 603755 605066 605132 605146 609248 611760 748169 748739 999999 
##      8     16      1      3      3      3      8     27      1      2      1    218 
## [1] "Frequency table after encoding"
## codlocal. codlocal
##    306    307    308    309    310    311    312    313    314    315    316    317    318    319    320    321    322    323    324    325    326    327    328    329 
##     21      3      7      7      2     12      3     15      1      8      2     26      7      4     16      5      2     16      5     19     15      6      5      6 
##    330    331    332    333    334    335    336    337    338    339    340    341    342    343    344    345    346    347    348    349    350    351    352    353 
##     17      6     24      4      2      8      3      5      6      9      1     22     12      5      6      7      1      3      1      8      7     15      8      4 
##    354    355    356    357    358    359    360    361    362    363    364    365    366    367    368    369    370    371    372    373    374    375    376    377 
##      1      2     20      1      6      4      1     15      4      1      2     10      2      7      2      5      6      7      2      1      3      2      9      2 
##    378    379    380    381    382    383    384    385    386    387    388    389    390    391    392    393    394    395    396    397    398    399    400    401 
##     18      4      8      4      6      4      1      4      7     16      1      2     31     10      3     21      4     14      5     10      2     19      7      5 
##    402    403    404    405    406    407    408    409    410    411    412    413    414    415    416    417    418    419    420    421    422    423    424    425 
##      7      4      5     21     11      3      1      6      6      3      1      2     14     14      3     10     20      3      4      4     14     14      2      1 
##    426    427    428    429    430    431    432    433    434    435    436    437    438    439    440    441    442    443    444    445    446    447    448    449 
##     15      2     11      3      1      3      3      3     16      1      8      1      5      4     17      1      4     14      1      9     14      3      8      4 
##    450    451    452    453    454    455    456    457    458    459    460    461    462    463    464    465    466    467    468    469    470    471    472    473 
##      4      8     15     15     26     11      6      5     10      1      4     15      2      7      6     19      2      6     14      7      7     10      6      1 
##    474    475    476    477    478    479    480    481    482    483    484    485    486    487    488    489    490    491    492    493    495    496    497    498 
##      4      8      1      5     12     23     17      2      9     22      2      6      3      5      5     13      1      3      2      4      8      1      5      4 
##    499    500    501    502    503    504    505    506    507    508    509    510    511    512    513    514    515    516    517    518    519    520    521    522 
##      7      1      8      4      7      2      9      1     17      1     13      1      1      4      6     16      4     17      5      2      2      6      6      3 
##    523    524    525    526    527    528    529    530    531    532    533    534    535    536    537    538    539    540    541    542    543    544    545    546 
##      1      3      4     22      1      2      3      2      5      1      3      2     16      5     24      5      6      1      9     12      6      2      7     16 
##    547    548    549    550    551    552    553    554    555    556    557    558    559    560    561    562    563    564    565    566    567    568    569    570 
##      3      4      1      4      7      2     14      1      8      6      9      4      4      2      3     13     14     17      7      6      9      1      1      5 
##    571    572    573    574    575    576    577    578    579    580    581    582    583    584    585    586    587    588    589    590    591    592    593    594 
##      5     10      4     15      1      9      8      4      2      5      3      7      1      6      4      7      2      1      2      5      7      6     11      8 
##    595    596    597    598    599    600    601    602    603    604    605    606    607    608    609    610    611    612    613    614    615    616    617    618 
##      9      5      4      4      4      7     16     12      2      1     14      5      1     27     10      1      8      1     18      6      1      1     22      6 
##    619    620    621    622    623    624    625    626    627    628    629    630    631    632    633    634    635    636    637    638    639    640    641    642 
##      1      1      7      3      9      1      5     13     14      1      2      1      8      8     16      1      4      1      1      7      3      4      3      2 
##    643    644    645    646    647    648    649    650    651    652    653    654    655    656    657    658    659    660    661    662    663    664    665    666 
##     10      2      8      1     15      3     10      6      1      6     16      4      6     11      3      6     16      8     11      6      7      3      1      2 
##    667    668    669    670    671    672    673    674    675    676    677 999999 
##      2     14      6      1      5      1      6     24      8      3      4    218 
## [1] "Frequency table before encoding"
## s4p11b1_2015. Codigo modular de la escuela a la que iria en secundaria (urbano)
## 145499 305888 318414 330562 340288   <NA> 
##      3      2      2      1      1   2739 
## [1] "Frequency table after encoding"
## s4p11b1_2015. Codigo modular de la escuela a la que iria en secundaria (urbano)
##  654  655  656  657  658 <NA> 
##    3    1    2    2    1 2739
# !!! Removed as it contains identifying information

dropvars <- c("nombre_colegio",
              "school2014_name",
              "school2014_name1",
              "school2013_name",
              "school2013_name1")

mydata <- mydata[!names(mydata) %in% dropvars]

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 


# Recode education attainment of adults to reduce risk of re-identification 


break_edu <- c(10,12,13)
labels_edu <- c("1ro-2do de secundaria"=1,
                "3ro de secundaria"=2,
                "4to-5to de secundaria"=3)
mydata <- ordinal_recode (variable="p29_1a2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p29_1a2. Hermano(a) 
## 1ro de secundaria 2do de secundaria 3ro de secundaria 4to de secundaria 5to de secundaria              <NA> 
##                19                37                53                36                15              2588 
##     recoded
##      [10,12) [12,13) [13,1e+06)
##   10      19       0          0
##   11      37       0          0
##   12       0      53          0
##   13       0       0         36
##   14       0       0         15
## [1] "Frequency table after encoding"
## p29_1a2. Hermano(a) 
## 1ro-2do de secundaria     3ro de secundaria 4to-5to de secundaria                  <NA> 
##                    56                    53                    51                  2588 
## [1] "Inspect value labels and relabel as necessary"
## 1ro-2do de secundaria     3ro de secundaria 4to-5to de secundaria 
##                     1                     2                     3
break_edu <- c(10,12)
labels_edu <- c("1ro-2do de secundaria"=1,
                "3ro de secundaria or more"=2)
mydata <- ordinal_recode (variable="p29_1a3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p29_1a3. Hermano(a) 
## 1ro de secundaria 2do de secundaria 3ro de secundaria 4to de secundaria 5to de secundaria              <NA> 
##                11                12                14                 8                 8              2695 
##     recoded
##      [10,12) [12,1e+06)
##   10      11          0
##   11      12          0
##   12       0         14
##   13       0          8
##   14       0          8
## [1] "Frequency table after encoding"
## p29_1a3. Hermano(a) 
##     1ro-2do de secundaria 3ro de secundaria or more                      <NA> 
##                        23                        30                      2695 
## [1] "Inspect value labels and relabel as necessary"
##     1ro-2do de secundaria 3ro de secundaria or more 
##                         1                         2
break_edu <- c(-98,0,2,3)
labels_edu <- c("No se"=1,
                "Inicial or Primaria"=2,
                "Secundaria"=3,
                "Superior no universitaria or more"=4)
mydata <- ordinal_recode (variable="p5a1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p5a1. Hermano(a) 
##                      Inicial                     Primaria                   Secundaria    Superior no universitaria Superior universitaria o mas 
##                           18                          265                          406                           34                            6 
##                         <NA> 
##                         2019 
##    recoded
##     [-98,0) [0,2) [2,3) [3,1e+06)
##   0       0    18     0         0
##   1       0   265     0         0
##   2       0     0   406         0
##   3       0     0     0        34
##   4       0     0     0         6
## [1] "Frequency table after encoding"
## p5a1. Hermano(a) 
##               Inicial or Primaria                        Secundaria Superior no universitaria or more                              <NA> 
##                               283                               406                                40                              2019 
## [1] "Inspect value labels and relabel as necessary"
##                             No se               Inicial or Primaria                        Secundaria Superior no universitaria or more 
##                                 1                                 2                                 3                                 4
break_edu <- c(-98,0,1,2,3)
labels_edu <- c("No se"=1,
                "Inicial"=2,
                "Primaria"=3,
                "Secundaria"=4,
                "Superior no universitaria or more"=5)
mydata <- ordinal_recode (variable="p5a2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p5a2. Hermano(a) 
##                      Inicial                     Primaria                   Secundaria    Superior no universitaria Superior universitaria o mas 
##                           60                          291                          218                           19                            2 
##                         <NA> 
##                         2158 
##    recoded
##     [-98,0) [0,1) [1,2) [2,3) [3,1e+06)
##   0       0    60     0     0         0
##   1       0     0   291     0         0
##   2       0     0     0   218         0
##   3       0     0     0     0        19
##   4       0     0     0     0         2
## [1] "Frequency table after encoding"
## p5a2. Hermano(a) 
##                           Inicial                          Primaria                        Secundaria Superior no universitaria or more 
##                                60                               291                               218                                21 
##                              <NA> 
##                              2158 
## [1] "Inspect value labels and relabel as necessary"
##                             No se                           Inicial                          Primaria                        Secundaria 
##                                 1                                 2                                 3                                 4 
## Superior no universitaria or more 
##                                 5
break_edu <- c(-98,0,1,2)
labels_edu <- c("No se"=1,
                "Inicial"=2,
                "Primaria"=3,
                "Secundaria or more"=4)
mydata <- ordinal_recode (variable="p5a3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p5a3. Hermano(a) 
##                   Inicial                  Primaria                Secundaria Superior no universitaria                      <NA> 
##                        65                       222                        66                         7                      2388 
##    recoded
##     [-98,0) [0,1) [1,2) [2,1e+06)
##   0       0    65     0         0
##   1       0     0   222         0
##   2       0     0     0        66
##   3       0     0     0         7
## [1] "Frequency table after encoding"
## p5a3. Hermano(a) 
##            Inicial           Primaria Secundaria or more               <NA> 
##                 65                222                 73               2388 
## [1] "Inspect value labels and relabel as necessary"
##              No se            Inicial           Primaria Secundaria or more 
##                  1                  2                  3                  4
break_edu <- c(-98,0,1,2)
labels_edu <- c("No se"=1,
                "Inicial"=2,
                "Primaria"=3,
                "Secundaria or more"=4)
mydata <- ordinal_recode (variable="p5a4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p5a4. Hermano(a) 
##                   Inicial                  Primaria                Secundaria Superior no universitaria                      <NA> 
##                        55                        87                        29                         2                      2575 
##    recoded
##     [-98,0) [0,1) [1,2) [2,1e+06)
##   0       0    55     0         0
##   1       0     0    87         0
##   2       0     0     0        29
##   3       0     0     0         2
## [1] "Frequency table after encoding"
## p5a4. Hermano(a) 
##            Inicial           Primaria Secundaria or more               <NA> 
##                 55                 87                 31               2575 
## [1] "Inspect value labels and relabel as necessary"
##              No se            Inicial           Primaria Secundaria or more 
##                  1                  2                  3                  4
break_edu <- c(4,6,7,8,9)
labels_edu <- c("1ro-2do de primaria"=1,
                "3ro de primaria"=2,
                "4t0 de primaria"=3,
                "5to de primaria"=4,
                "6to de primaria"=5)
mydata <- ordinal_recode (variable="p28a1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p28a1. Hermano(a) 
## 1ro de primaria 2do de primaria 3ro de primaria 4to de primaria 5to de primaria 6to de primaria            <NA> 
##              14              26              43              52              44              39            2530 
##    recoded
##     [4,6) [6,7) [7,8) [8,9) [9,1e+06)
##   4    14     0     0     0         0
##   5    26     0     0     0         0
##   6     0    43     0     0         0
##   7     0     0    52     0         0
##   8     0     0     0    44         0
##   9     0     0     0     0        39
## [1] "Frequency table after encoding"
## p28a1. Hermano(a) 
## 1ro-2do de primaria     3ro de primaria     4t0 de primaria     5to de primaria     6to de primaria                <NA> 
##                  40                  43                  52                  44                  39                2530 
## [1] "Inspect value labels and relabel as necessary"
## 1ro-2do de primaria     3ro de primaria     4t0 de primaria     5to de primaria     6to de primaria 
##                   1                   2                   3                   4                   5
break_edu <- c(4,5,6,7,8)
labels_edu <- c("1ro de primaria"=1,
                "2do de primaria"=2,
                "3r0 de primaria"=3,
                "4to de primaria"=4,
                "5to-6to de primaria"=5)
mydata <- ordinal_recode (variable="p28a2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p28a2. Hermano(a) 
## 1ro de primaria 2do de primaria 3ro de primaria 4to de primaria 5to de primaria 6to de primaria            <NA> 
##              37              40              51              57              37              24            2502 
##    recoded
##     [4,5) [5,6) [6,7) [7,8) [8,1e+06)
##   4    37     0     0     0         0
##   5     0    40     0     0         0
##   6     0     0    51     0         0
##   7     0     0     0    57         0
##   8     0     0     0     0        37
##   9     0     0     0     0        24
## [1] "Frequency table after encoding"
## p28a2. Hermano(a) 
##     1ro de primaria     2do de primaria     3r0 de primaria     4to de primaria 5to-6to de primaria                <NA> 
##                  37                  40                  51                  57                  61                2502 
## [1] "Inspect value labels and relabel as necessary"
##     1ro de primaria     2do de primaria     3r0 de primaria     4to de primaria 5to-6to de primaria 
##                   1                   2                   3                   4                   5
break_edu <- c(4,5,6,7,8)
labels_edu <- c("1ro de primaria"=1,
                "2do de primaria"=2,
                "3r0 de primaria"=3,
                "4to de primaria"=4,
                "5to-6to de primaria"=5)
mydata <- ordinal_recode (variable="p28a3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p28a3. Hermano(a) 
## 1ro de primaria 2do de primaria 3ro de primaria 4to de primaria 5to de primaria 6to de primaria            <NA> 
##              42              44              37              33              28               7            2557 
##    recoded
##     [4,5) [5,6) [6,7) [7,8) [8,1e+06)
##   4    42     0     0     0         0
##   5     0    44     0     0         0
##   6     0     0    37     0         0
##   7     0     0     0    33         0
##   8     0     0     0     0        28
##   9     0     0     0     0         7
## [1] "Frequency table after encoding"
## p28a3. Hermano(a) 
##     1ro de primaria     2do de primaria     3r0 de primaria     4to de primaria 5to-6to de primaria                <NA> 
##                  42                  44                  37                  33                  35                2557 
## [1] "Inspect value labels and relabel as necessary"
##     1ro de primaria     2do de primaria     3r0 de primaria     4to de primaria 5to-6to de primaria 
##                   1                   2                   3                   4                   5
break_edu <- c(4,6)
labels_edu <- c("1ro or 2do de primaria"=1,
                "3ro de primaria or more"=2)
mydata <- ordinal_recode (variable="p28a4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p28a4. Hermano(a) 
## 1ro de primaria 2do de primaria 3ro de primaria 4to de primaria 5to de primaria 6to de primaria            <NA> 
##              18              14              14              12               2               6            2682 
##    recoded
##     [4,6) [6,1e+06)
##   4    18         0
##   5    14         0
##   6     0        14
##   7     0        12
##   8     0         2
##   9     0         6
## [1] "Frequency table after encoding"
## p28a4. Hermano(a) 
##  1ro or 2do de primaria 3ro de primaria or more                    <NA> 
##                      32                      34                    2682 
## [1] "Inspect value labels and relabel as necessary"
##  1ro or 2do de primaria 3ro de primaria or more 
##                       1                       2
break_edu <- c(-98,-1,0,1,2,3)
labels_edu <- c("No se"=1,
                "sin nivel"=2,
                "Inicial"=3,
                "Primaria completa"=4,
                "Secundaria completa"=5,
                "Superior tecnica incompleta/completa or Superior universitaria completa/incompleta"=6)
mydata <- ordinal_recode (variable="p6_1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6_1. Padre
##                       Sin nivel                         Inicial               Primaria completa             Secundaria completa     Superior tecnica incompleta 
##                              89                              88                             544                             133                               3 
##       Superior tecnica completa Superior universitaria completa                            <NA> 
##                               4                               1                            1886 
##     recoded
##      [-98,-1) [-1,0) [0,1) [1,2) [2,3) [3,1e+06)
##   -1        0     89     0     0     0         0
##   0         0      0    88     0     0         0
##   1         0      0     0   544     0         0
##   2         0      0     0     0   133         0
##   3         0      0     0     0     0         3
##   4         0      0     0     0     0         4
##   6         0      0     0     0     0         1
## [1] "Frequency table after encoding"
## p6_1. Padre
##                                                                          sin nivel                                                                            Inicial 
##                                                                                 89                                                                                 88 
##                                                                  Primaria completa                                                                Secundaria completa 
##                                                                                544                                                                                133 
## Superior tecnica incompleta/completa or Superior universitaria completa/incompleta                                                                               <NA> 
##                                                                                  8                                                                               1886 
## [1] "Inspect value labels and relabel as necessary"
##                                                                              No se                                                                          sin nivel 
##                                                                                  1                                                                                  2 
##                                                                            Inicial                                                                  Primaria completa 
##                                                                                  3                                                                                  4 
##                                                                Secundaria completa Superior tecnica incompleta/completa or Superior universitaria completa/incompleta 
##                                                                                  5                                                                                  6
break_edu <- c(-98,-1,0,1,2,3)
labels_edu <- c("No se"=1,
                "sin nivel"=2,
                "Inicial"=3,
                "Primaria completa"=4,
                "Secundaria completa"=5,
                "Superior tecnica incompleta/completa or Superior universitaria completa/incompleta"=6)
mydata <- ordinal_recode (variable="p6_2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6_2. Madre
##                   Sin nivel                     Inicial           Primaria completa         Secundaria completa Superior tecnica incompleta   Superior tecnica completa 
##                         283                         134                         471                          53                           1                           4 
##                        <NA> 
##                        1802 
##     recoded
##      [-98,-1) [-1,0) [0,1) [1,2) [2,3) [3,1e+06)
##   -1        0    283     0     0     0         0
##   0         0      0   134     0     0         0
##   1         0      0     0   471     0         0
##   2         0      0     0     0    53         0
##   3         0      0     0     0     0         1
##   4         0      0     0     0     0         4
## [1] "Frequency table after encoding"
## p6_2. Madre
##                                                                          sin nivel                                                                            Inicial 
##                                                                                283                                                                                134 
##                                                                  Primaria completa                                                                Secundaria completa 
##                                                                                471                                                                                 53 
## Superior tecnica incompleta/completa or Superior universitaria completa/incompleta                                                                               <NA> 
##                                                                                  5                                                                               1802 
## [1] "Inspect value labels and relabel as necessary"
##                                                                              No se                                                                          sin nivel 
##                                                                                  1                                                                                  2 
##                                                                            Inicial                                                                  Primaria completa 
##                                                                                  3                                                                                  4 
##                                                                Secundaria completa Superior tecnica incompleta/completa or Superior universitaria completa/incompleta 
##                                                                                  5                                                                                  6
break_edu <- c(-98,-1,2,3)
labels_edu <- c("No se"=1,
                "Primaria completa or less"=2,
                "Secundaria completa"=3,
                "Superior tecnica incompleta or more"=4)
mydata <- ordinal_recode (variable="p6a1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6a1. Hermano(a) 
##                         Sin nivel                           Inicial                 Primaria completa               Secundaria completa 
##                                 2                                 2                                55                                65 
##       Superior tecnica incompleta         Superior tecnica completa Superior universitaria incompleta                              <NA> 
##                                 7                                 3                                 2                              2612 
##     recoded
##      [-98,-1) [-1,2) [2,3) [3,1e+06)
##   -1        0      2     0         0
##   0         0      2     0         0
##   1         0     55     0         0
##   2         0      0    65         0
##   3         0      0     0         7
##   4         0      0     0         3
##   5         0      0     0         2
## [1] "Frequency table after encoding"
## p6a1. Hermano(a) 
##           Primaria completa or less                 Secundaria completa Superior tecnica incompleta or more                                <NA> 
##                                  59                                  65                                  12                                2612 
## [1] "Inspect value labels and relabel as necessary"
##                               No se           Primaria completa or less                 Secundaria completa Superior tecnica incompleta or more 
##                                   1                                   2                                   3                                   4
break_edu <- c(-98,-1,2)
labels_edu <- c("No se"=1,
                "Primaria completa or less"=2,
                "Secundaria completa or more"=3)
mydata <- ordinal_recode (variable="p6a2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6a2. Hermano(a) 
##                   Sin nivel                     Inicial           Primaria completa         Secundaria completa Superior tecnica incompleta                        <NA> 
##                           1                           2                          22                          22                           2                        2699 
##     recoded
##      [-98,-1) [-1,2) [2,1e+06)
##   -1        0      1         0
##   0         0      2         0
##   1         0     22         0
##   2         0      0        22
##   3         0      0         2
## [1] "Frequency table after encoding"
## p6a2. Hermano(a) 
##   Primaria completa or less Secundaria completa or more                        <NA> 
##                          25                          24                        2699 
## [1] "Inspect value labels and relabel as necessary"
##                       No se   Primaria completa or less Secundaria completa or more 
##                           1                           2                           3
break_edu <- c(-98,-1,2)
labels_edu <- c("No se"=1,
                "Primaria completa or less"=2,
                "Secundaria completa or more"=3)
mydata <- ordinal_recode (variable="p6a3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6a3. Hermano(a) 
##                   Sin nivel                     Inicial           Primaria completa         Secundaria completa Superior tecnica incompleta                        <NA> 
##                           1                           1                          14                          10                           1                        2721 
##     recoded
##      [-98,-1) [-1,2) [2,1e+06)
##   -1        0      1         0
##   0         0      1         0
##   1         0     14         0
##   2         0      0        10
##   3         0      0         1
## [1] "Frequency table after encoding"
## p6a3. Hermano(a) 
##   Primaria completa or less Secundaria completa or more                        <NA> 
##                          16                          11                        2721 
## [1] "Inspect value labels and relabel as necessary"
##                       No se   Primaria completa or less Secundaria completa or more 
##                           1                           2                           3
break_edu <- c(-98,-1,1)
labels_edu <- c("No se"=1,
                "Inicial or less"=2,
                "Primaria completa or more"=3)
mydata <- ordinal_recode (variable="p6b1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6b1. Abuelo(a) 
##           Sin nivel             Inicial   Primaria completa Secundaria completa                <NA> 
##                  54                   4                  38                   1                2651 
##     recoded
##      [-98,-1) [-1,1) [1,1e+06)
##   -1        0     54         0
##   0         0      4         0
##   1         0      0        38
##   2         0      0         1
## [1] "Frequency table after encoding"
## p6b1. Abuelo(a) 
##           Inicial or less Primaria completa or more                      <NA> 
##                        58                        39                      2651 
## [1] "Inspect value labels and relabel as necessary"
##                     No se           Inicial or less Primaria completa or more 
##                         1                         2                         3
# Top code household composition variables with large and unusual numbers 

mydata <- top_recode ("p1", break_point=10, missing=c(888, 999999)) 
## [1] "Frequency table before encoding"
## p1. Cuantas personas viven en total en el hogar?
##    1    2    3    4    5    6    7    8    9   10   12   16 <NA> 
##    2   24   98  185  239  201  140   68   31   24    3    1 1732

## [1] "Frequency table after encoding"
## p1. Cuantas personas viven en total en el hogar?
##          1          2          3          4          5          6          7          8          9 10 or more       <NA> 
##          2         24         98        185        239        201        140         68         31         28       1732

mydata <- top_recode ("p2c", break_point=6, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2c. Con cuantos hermanos o hermanas vive?
##    0    1    2    3    4    5    6    7    9   10 <NA> 
##  126  204  246  209  120   67   27   14    2    1 1732

## [1] "Frequency table after encoding"
## p2c. Con cuantos hermanos o hermanas vive?
##         0         1         2         3         4         5 6 or more      <NA> 
##       126       204       246       209       120        67        44      1732

mydata <- top_recode ("p2d", break_point=2, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2d. Con cuantos abuelos o abuelas vive?
##    0    1    2    3 <NA> 
##  912   77   26    1 1732

## [1] "Frequency table after encoding"
## p2d. Con cuantos abuelos o abuelas vive?
##         0         1 2 or more      <NA> 
##       912        77        27      1732

mydata <- top_recode ("p2e", break_point=1, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2e. Con cuantos tios o tias vive?
##    0    1    2 <NA> 
##  997   13    6 1732

## [1] "Frequency table after encoding"
## p2e. Con cuantos tios o tias vive?
##         0 1 or more      <NA> 
##       997        19      1732

mydata <- top_recode ("p2f", break_point=1, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2f. Con cuantos sobrinos vive?
##    0    1    2    3 <NA> 
##  978   29    7    2 1732

## [1] "Frequency table after encoding"
## p2f. Con cuantos sobrinos vive?
##         0 1 or more      <NA> 
##       978        38      1732

mydata <- top_recode ("p2g", break_point=1, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2g. Con cuantos otros miembros del hogar vive el/la nino/a?
##    0    1    2    3    5 <NA> 
##  944   58    7    6    1 1732

## [1] "Frequency table after encoding"
## p2g. Con cuantos otros miembros del hogar vive el/la nino/a?
##         0 1 or more      <NA> 
##       944        72      1732

mydata <- top_recode ("sc_ave_3a", break_point=200, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## sc_ave_3a. Men Height (BT)
##   51   55   56   60   65   70   75   80   85   86   89   90   98  100  105  109  110  112  115  118  120  123  125  128  130  135  137  140  145  150  151  152  153 
##    2    1    2    7    2    4    1   10    1    1    1    3    1   38    2    1    3    1    1    1   21    2    4    1   13    6    2   24    1  109    1    1    1 
##  155  156  157  158  159  160  161  162  163  165  168  170  174  175  176  178  180  181  185  186  187  190  195  200  204  210  215  230  250  258  260  270  272 
##    5    4    1    2    1  148    1    4    2   30    3  124    1   11    1    1  108    1    1    1    1   16    2   65    1    2    1    1    4    1    1    1    2 
## <NA> 
## 1931

## [1] "Frequency table after encoding"
## sc_ave_3a. Men Height (BT)
##          51          55          56          60          65          70          75          80          85          86          89          90          98         100 
##           2           1           2           7           2           4           1          10           1           1           1           3           1          38 
##         105         109         110         112         115         118         120         123         125         128         130         135         137         140 
##           2           1           3           1           1           1          21           2           4           1          13           6           2          24 
##         145         150         151         152         153         155         156         157         158         159         160         161         162         163 
##           1         109           1           1           1           5           4           1           2           1         148           1           4           2 
##         165         168         170         174         175         176         178         180         181         185         186         187         190         195 
##          30           3         124           1          11           1           1         108           1           1           1           1          16           2 
## 200 or more        <NA> 
##          79        1931

mydata <- top_recode ("sc_ave_3b", break_point=175, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## sc_ave_3b. Women Height (BT)
##   51   52   54   55   56   58   60   64   70   74   76   80   85   89   90   95  100  104  105  110  114  115  117  118  120  123  125  126  127  128  129  130  134 
##    7    1    1    3    4    1    6    1    5    1    1   10    2    1   10    1   46    1    1   17    1    1    1    1   31    3    7    1    1    1    1   31    1 
##  135  136  137  139  140  142  143  144  145  147  149  150  151  152  153  154  155  156  157  158  159  160  162  163  164  165  166  168  169  170  175  177  180 
##    4    1    1    1   65    1    1    1   34    2    2  204    3    1    2    2   44    5    1    3    1   92    1    2    1   15    1    1    1   43    1    1   24 
##  181  185  189  190  193  198  200  202  205  230  245  249  250  272 <NA> 
##    2    2    1   12    1    1   20    1    1    1    1    1    4    2 1935

## [1] "Frequency table after encoding"
## sc_ave_3b. Women Height (BT)
##          51          52          54          55          56          58          60          64          70          74          76          80          85          89 
##           7           1           1           3           4           1           6           1           5           1           1          10           2           1 
##          90          95         100         104         105         110         114         115         117         118         120         123         125         126 
##          10           1          46           1           1          17           1           1           1           1          31           3           7           1 
##         127         128         129         130         134         135         136         137         139         140         142         143         144         145 
##           1           1           1          31           1           4           1           1           1          65           1           1           1          34 
##         147         149         150         151         152         153         154         155         156         157         158         159         160         162 
##           2           2         204           3           1           2           2          44           5           1           3           1          92           1 
##         163         164         165         166         168         169         170 175 or more        <NA> 
##           2           1          15           1           1           1          43          76        1935

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_1)[na.exclude(mydata$p7_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7_1", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7_1. Padre
##    0    4   10   30   50   60   70   72   75   80   90  100  105  120  125  130  140  150  160  175  180  200  210  220  240  250  255  280  300  325  350  400  500 
##    2    1    1    1    4    3    3    1    2    1    2   12    1    9    5    1    1   17    1    1    7   23    3    1    4   24    2    1   21    1    8   15    7 
##  600  700  750  800  810  900  960 1000 1200 1300 1500 1600 1900 2000 3000 <NA> 
##    6    2    1    3    1    4    1    7    5    1    2    1    1    1    1 2525

## [1] "Frequency table after encoding"
## p7_1. Padre
##            0            4           10           30           50           60           70           72           75           80           90          100 
##            2            1            1            1            4            3            3            1            2            1            2           12 
##          105          120          125          130          140          150          160          175          180          200          210          220 
##            1            9            5            1            1           17            1            1            7           23            3            1 
##          240          250          255          280          300          325          350          400          500          600          700          750 
##            4           24            2            1           21            1            8           15            7            6            2            1 
##          800          810          900          960         1000         1200         1300         1500         1600         1900 1988 or more         <NA> 
##            3            1            4            1            7            5            1            2            1            1            2         2525

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_2)[na.exclude(mydata$p7_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7_2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7_2. Madre
##    0   20   25   30   35   50   65   75   80  100  125  150  160  187  200  210  225  400  900 1000 <NA> 
##    2    1    1    1    1    3    1    3    2    7    2    6    2    1    7    1    1    1    1    1 2703

## [1] "Frequency table after encoding"
## p7_2. Madre
##           0          20          25          30          35          50          65          75          80         100         125         150         160         187 
##           2           1           1           1           1           3           1           3           2           7           2           6           2           1 
##         200         210         225         400         900 978 or more        <NA> 
##           7           1           1           1           1           1        2703

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c1)[na.exclude(mydata$p7c1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7c1", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7c1. Tio(a) 
##  200  800 <NA> 
##    1    1 2746

## [1] "Frequency table after encoding"
## p7c1. Tio(a) 
##         200 797 or more        <NA> 
##           1           1        2746

percentile_99.5 <- floor(quantile(na.exclude(mydata$p49)[na.exclude(mydata$p49)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p49", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p49. Cuanto gasta cada mes en la educacion de todos sus hijos que viven en este hoga
##    0    5   10   12   15   20   25   28   30   35   40   45   46   50   55   60   62   70   80   90  100  120  135  140  150  160  170  180  200  210  220  230  250 
##   13    5   14    1    4   25    5    1   27    2   18    2    1   86    1   17    1   14   23    5  129   15    1    2   62    3    1    7  155    1    1    1   38 
##  280  300  350  380  400  450  500  520  600  650  700  800  900 1000 1200 1500 2000 3000 <NA> 
##    2  103   10    1   41    2   72    1   22    1    6   10    4   39    1    5   12    3 1732

## [1] "Frequency table after encoding"
## p49. Cuanto gasta cada mes en la educacion de todos sus hijos que viven en este hoga
##            0            5           10           12           15           20           25           28           30           35           40           45 
##           13            5           14            1            4           25            5            1           27            2           18            2 
##           46           50           55           60           62           70           80           90          100          120          135          140 
##            1           86            1           17            1           14           23            5          129           15            1            2 
##          150          160          170          180          200          210          220          230          250          280          300          350 
##           62            3            1            7          155            1            1            1           38            2          103           10 
##          380          400          450          500          520          600          650          700          800          900         1000         1200 
##            1           41            2           72            1           22            1            6           10            4           39            1 
##         1500 2000 or more         <NA> 
##            5           15         1732

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a1)[na.exclude(mydata$p7a1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7a1", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7a1. Hermano(a) 
##    0   15   20   50   60  100  120  125  150  180  200  210  250  270  300  350  400  500  600  900 1000 1200 1350 1500 <NA> 
##    1    2    2    5    2    3    2    1    6    1   11    1    4    1    6    1    2    3    1    3    2    1    1    1 2685

## [1] "Frequency table after encoding"
## p7a1. Hermano(a) 
##            0           15           20           50           60          100          120          125          150          180          200          210 
##            1            2            2            5            2            3            2            1            6            1           11            1 
##          250          270          300          350          400          500          600          900         1000         1200         1350 1453 or more 
##            4            1            6            1            2            3            1            3            2            1            1            1 
##         <NA> 
##         2685

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a2)[na.exclude(mydata$p7a2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7a2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7a2. Hermano(a) 
##    0   15   50   75  125  150  160  200  250  300  350  500  800 1000 <NA> 
##    3    2    2    1    1    3    1    3    2    1    1    1    1    1 2725

## [1] "Frequency table after encoding"
## p7a2. Hermano(a) 
##           0          15          50          75         125         150         160         200         250         300         350         500         800 978 or more 
##           3           2           2           1           1           3           1           3           2           1           1           1           1           1 
##        <NA> 
##        2725

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a3)[na.exclude(mydata$p7a3)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7a3", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7a3. Hermano(a) 
##    0   75   90  150  200  500  800 1000 <NA> 
##    1    1    1    1    1    1    1    1 2740

## [1] "Frequency table after encoding"
## p7a3. Hermano(a) 
##           0          75          90         150         200         500         800 993 or more        <NA> 
##           1           1           1           1           1           1           1           1        2740

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a4)[na.exclude(mydata$p7a4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="p7a4", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## p7a4. Hermano(a) 
##    0  200 <NA> 
##    2    1 2745

## [1] "Frequency table after encoding"
## p7a4. Hermano(a) 
##           0 198 or more        <NA> 
##           2           1        2745

# Remove as it constains identifying education

mydata <- mydata[!names(mydata) %in% "birthdate"]

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)


indirect_PII <- c("sexo",
                  "i14",
                  "telf_yesno",
                  "gender_nino",
                  "dropout_reasons_1",
                  "dropout_reasons_2",
                  "dropout_reasons_3",
                  "dropout_reasons_4",
                  "dropout_reasons_5",
                  "dropout_reasons_6",
                  "dropout_reasons_7",
                  "dropout_reasons_8",
                  "dropout_reasons_9",
                  "dropout_reasons_10",
                  "dropout_reasons_11",
                  "dropout_reasons_12",
                  "dropout_reasons_13",
                  "dropout_reasons_14",
                  "dropout_reasons_99",
                  "p2a",
                  "p2b",
                  "p3a1",
                  "p3a2",
                  "p3a3",
                  "p3a4",
                  "p3a5",
                  "p3a6",
                  "p3a7",
                  "p3a8",
                  "p3a9",
                  "p3a10",
                  "p3b1",
                  "p3b2",
                  "p3b3",
                  "p3c1",
                  "p3c2",
                  "p3d1",
                  "p3d2",
                  "p3d3",
                  "p26a1",
                  "p26a2",
                  "p26a3",
                  "p26a4",
                  "p26a5",
                  "p26a6",
                  "p26a7",
                  "p26a8",
                  "p26a9",
                  "p26a10",
                  "p26c1",
                  "p26c2",
                  "p26d1",
                  "p26d2",
                  "p26d3",
                  "p4_1",
                  "p4_2",
                  "p4a1",
                  "p4a2",
                  "p4a3",
                  "p4a4",
                  "p4a5",
                  "p4a6",
                  "p4a7",
                  "p4a8",
                  "p4a9",
                  "p4a10",
                  "p4b1",
                  "p4b2",
                  "p4b3",
                  "p4c1",
                  "p4c2",
                  "p4d1",
                  "p4d2",
                  "p4d3",
                  "p5_aa1",
                  "p5_aa2",
                  "p5_aa3",
                  "p5_aa4",
                  "p5_aa5",
                  "p5_aa6",
                  "p5_aa7",
                  "p5_aa8",
                  "p5_aa9",
                  "p10_1",
                  "p42",
                  "p44b_1",
                  "p44b_2",
                  "p44b_3",
                  "p44b_4",
                  "p44b_5",
                  "p44b_6",
                  "p44b_7",
                  "p44b_99",
                  "nivel",
                  "p1a_1",
                  "p1a_2",
                  "p1a_3",
                  "dout_reasons_2",
                  "dout_reasons_3",
                  "dout_reasons_4",
                  "dout_reasons_5",
                  "dout_reasons_6",
                  "dout_reasons_7",
                  "dout_reasons_8",
                  "dout_reasons_9",
                  "dout_reasons_10",
                  "dout_reasons_11",
                  "dout_reasons_12",
                  "dout_reasons_13",
                  "dout_reasons_14",
                  "p16",
                  "gender",
                  "hazardous_work",
                  "worst_forms",
                  "child_labor",
                  "juntos_dist_hogar",
                  "juntos1",
                  "juntos2",
                  "juntos3",
                  "juntos4",
                  "juntos_rnu",
                  "juntos_dist",
                  "juntos",
                  "juntos_ind",
                  "pobn",
                  "pobx",
                  "D_distjuntos",
                  "school_fixed_level",
                  "D_liveswithmother",
                  "D_liveswithfather",
                  "p12c",
                  "dout_reasons",
                  "dout_reasons_1",
                  "dout_decision",
                  "genero",
                  "p22a",
                  "p22b",
                  "act_sd_4",
                  "act_sd_4a",
                  "act_sd_4b",
                  "act_sd_4c",
                  "act_sd_5",
                  "act_sd_5a",
                  "act_sd_5b",
                  "act_sd_5c",
                  "act_sd_6",
                  "act_sd_6a",
                  "act_sd_6b",
                  "act_sd_6c",
                  "act_sd_7",
                  "act_sd_7a",
                  "act_sd_7b",
                  "act_sd_7c",
                  "act_sd_8",
                  "act_sd_8a",
                  "act_sd_8b",
                  "act_sd_8c",
                  "act_sd_9",
                  "act_sd_9a",
                  "act_sd_9b",
                  "act_sd_9c",
                  "act_sd_10",
                  "act_sd_10a",
                  "act_sd_10b",
                  "act_sd_10c",
                  "act_sd_11",
                  "act_sd_11a",
                  "act_sd_11b",
                  "act_sd_11c",
                  "act_sd_12",
                  "act_sd_12a",
                  "act_sd_12b",
                  "act_sd_12c",
                  "act_sd_13",
                  "act_sd_13a",
                  "act_sd_13b",
                  "act_sd_13c",
                  "act_sd_14",
                  "act_sd_14a",
                  "act_sd_14b",
                  "act_sd_14c",
                  "act_sd_15",
                  "act_sd_15a",
                  "act_sd_15b",
                  "act_sd_15c",
                  "act_sd_16",
                  "act_sd_16a",
                  "act_sd_16b",
                  "act_sd_16c",
                  "act_sd_17",
                  "act_sd_17a",
                  "act_sd_17b",
                  "act_sd_17c",
                  "act_sd_18",
                  "act_sd_18a",
                  "act_sd_18b",
                  "act_sd_18c",
                  "act_sd_19",
                  "act_sd_19a",
                  "act_sd_19b",
                  "act_sd_19c",
                  "act_sd_20",
                  "act_sd_20a",
                  "act_sd_20b",
                  "act_sd_20c",
                  "act_sd_21",
                  "act_sd_21a",
                  "act_sd_21b",
                  "act_sd_21c",
                  "act_sd_22",
                  "act_sd_22a",
                  "act_sd_23",
                  "act_sd_23a",
                  "act_sd_24",
                  "act_sd_1",
                  "act_sd_2",
                  "act_sd_3",
                  "act_wed_4",
                  "act_wed_4a",
                  "act_wed_4b",
                  "act_wed_4c",
                  "act_wed_5",
                  "act_wed_5a",
                  "act_wed_5b",
                  "act_wed_5c",
                  "act_wed_6",
                  "act_wed_6a",
                  "act_wed_6b",
                  "act_wed_6c",
                  "act_wed_7",
                  "act_wed_7a",
                  "act_wed_7b",
                  "act_wed_7c",
                  "act_wed_8",
                  "act_wed_8a",
                  "act_wed_8b",
                  "act_wed_8c",
                  "act_wed_9",
                  "act_wed_9a",
                  "act_wed_9b",
                  "act_wed_9c",
                  "act_wed_10",
                  "act_wed_10a",
                  "act_wed_10b",
                  "act_wed_10c",
                  "act_wed_11",
                  "act_wed_11a",
                  "act_wed_11b",
                  "act_wed_11c",
                  "act_wed_12",
                  "act_wed_12a",
                  "act_wed_12b",
                  "act_wed_12c",
                  "act_wed_13",
                  "act_wed_13a",
                  "act_wed_13b",
                  "act_wed_13c",
                  "act_wed_14",
                  "act_wed_14a",
                  "act_wed_14b",
                  "act_wed_14c",
                  "act_wed_15",
                  "act_wed_15a",
                  "act_wed_15b",
                  "act_wed_15c",
                  "act_wed_16",
                  "act_wed_16a",
                  "act_wed_16b",
                  "act_wed_16c",
                  "act_wed_17",
                  "act_wed_17a",
                  "act_wed_17b",
                  "act_wed_17c",
                  "act_wed_18",
                  "act_wed_18a",
                  "act_wed_18b",
                  "act_wed_18c",
                  "act_wed_19",
                  "act_wed_19a",
                  "act_wed_19b",
                  "act_wed_19c",
                  "act_wed_20",
                  "act_wed_20a",
                  "act_wed_20b",
                  "act_wed_20c",
                  "act_wed_21",
                  "act_wed_21a",
                  "act_wed_21b",
                  "act_wed_21c",
                  "act_wed_22",
                  "act_wed_22a",
                  "act_wed_22b",
                  "act_wed_22c",
                  "act_wed_23",
                  "act_wed_23a",
                  "act_wed_24",
                  "act_wed_1",
                  "act_wed_2",
                  "act_wed_3",
                  "p25a1",
                  "p25a2",
                  "p25a3",
                  "p25b",
                  "p25c",
                  "p25d",
                  "p25e",
                  "p25_1a",
                  "p25_1b",
                  "p25_1c",
                  "p25_1d",
                  "p25_1e",
                  "p25_1f",
                  "p25_2g",
                  "p25_3h",
                  "p25_4i",
                  "p25_5j",
                  "p25_6k",
                  "p25_7l",
                  "p25_8m",
                  "p25_9n",
                  "p25_10o",
                  "p25_11p",
                  "p25_12q",
                  "p25_13r",
                  "p25_14s",
                  "p25_14t",
                  "p25_2a",
                  "p25_2b",
                  "p25_2c",
                  "p25_2d",
                  "p25_2e",
                  "p25_2f",
                  "p25_2g1",
                  "p25_2h",
                  "p25_2i",
                  "p27a",
                  "p27b",
                  "p27c",
                  "p27d",
                  "p27e",
                  "switcher_2016",
                  "switcher_2015",
                  "switcher_2014",
                  "asissted_2014",
                  "same_school2014",
                  "same_school2013",
                  "asissted_2013",
                  "s5p15a_2015",
                  "s5p12a_2015",
                  "s5p12c_2015",
                  "s5p16_2015",
                  "s6p22a_2015",
                  "s6p22b_2015",
                  "s6p25p25a1_2015",
                  "s6p25p25a2_2015",
                  "s6p25p25a3_2015",
                  "s6p25p25b_2015",
                  "s6p25p25c_2015",
                  "s6p25p25d_2015",
                  "s6p25p25e_2015",
                  "s6p25_1p25_1a_2015",
                  "s6p25_1p25_1b_2015",
                  "s6p25_1p25_1c_2015",
                  "s6p25_1p25_1d_2015",
                  "s6p25_1p25_1e_2015",
                  "s6p25_1p25_1f_2015",
                  "s6p25_1p25_2g_2015",
                  "s6p25_1p25_3h_2015",
                  "s6p25_1p25_4i_2015",
                  "s6p25_1p25_5j_2015",
                  "s6p25_1p25_6k_2015",
                  "s6p25_1p25_7l_2015",
                  "s6p25_1p25_8m_2015",
                  "s6p25_1p25_9n_2015",
                  "s6p25_1p25_10o_2015",
                  "s6p25_1p25_11p_2015",
                  "s6p25_1p25_12q_2015",
                  "s6p25_1p25_13r_2015",
                  "s6p25_1p25_14s_2015",
                  "s6p25_1p25_14t_2015",
                  "s6p25_2p25_2a_2015",
                  "s6p25_2p25_2b_2015",
                  "s6p25_2p25_2c_2015",
                  "s6p25_2p25_2d_2015",
                  "s6p25_2p25_2e_2015",
                  "s6p25_2p25_2f_2015",
                  "s6p25_2p25_2g_2015",
                  "s6p25_2p25_2h_2015",
                  "s6p25_2p25_2i_2015",
                  "s7p27a_2015",
                  "s7p27b_2015",
                  "s7p27c_2015",
                  "s7p27d_2015",
                  "s7p27e_2015",
                  "info4a_2015",
                  "info4b_2015",
                  "hazardous_work_2015",
                  "worst_forms_2015",
                  "child_labor_2015")

capture_tables (indirect_PII)


# Recode those with very specific values. 

break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4_1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4_1. Padre
##        Estudia y tiene un trabajo remunerado                           Trabajo remunerado Quehaceres del hogar o trabajo no remunerado 
##                                            4                                          220                                          648 
##         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                            6                                         1870 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   2     0     4     0     0         0
##   3     0     0   220     0         0
##   4     0     0     0   648         0
##   5     0     0     0     0         6
## [1] "Frequency table after encoding"
## p4_1. Padre
##                                        Otros                           Trabajo remunerado Quehaceres del hogar o trabajo no remunerado 
##                                           10                                          220                                          648 
##                                         <NA> 
##                                         1870 
## [1] "Inspect value labels and relabel as necessary"
##                                        Otros                                        Otros                           Trabajo remunerado 
##                                            1                                            2                                            3 
## Quehaceres del hogar o trabajo no remunerado                                        Otros 
##                                            4                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4_2", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4_2. Madre
##                                      Estudia        Estudia y tiene un trabajo remunerado                           Trabajo remunerado 
##                                            2                                            1                                           44 
## Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                          903                                            9                                         1789 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1     2     0     0     0         0
##   2     0     1     0     0         0
##   3     0     0    44     0         0
##   4     0     0     0   903         0
##   5     0     0     0     0         9
## [1] "Frequency table after encoding"
## p4_2. Madre
##                                        Otros                           Trabajo remunerado Quehaceres del hogar o trabajo no remunerado 
##                                           12                                           44                                          903 
##                                         <NA> 
##                                         1789 
## [1] "Inspect value labels and relabel as necessary"
##                                        Otros                                        Otros                           Trabajo remunerado 
##                                            1                                            2                                            3 
## Quehaceres del hogar o trabajo no remunerado                                        Otros 
##                                            4                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4a1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4a1. Hermano(a) 
##                                      Estudia        Estudia y tiene un trabajo remunerado                           Trabajo remunerado 
##                                          719                                           12                                           52 
## Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                           84                                           23                                         1858 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1   719     0     0     0         0
##   2     0    12     0     0         0
##   3     0     0    52     0         0
##   4     0     0     0    84         0
##   5     0     0     0     0        23
## [1] "Frequency table after encoding"
## p4a1. Hermano(a) 
##                                      Estudia                                        Otros                           Trabajo remunerado 
##                                          719                                           35                                           52 
## Quehaceres del hogar o trabajo no remunerado                                         <NA> 
##                                           84                                         1858 
## [1] "Inspect value labels and relabel as necessary"
##                                      Estudia                                        Otros                           Trabajo remunerado 
##                                            1                                            2                                            3 
## Quehaceres del hogar o trabajo no remunerado                                        Otros 
##                                            4                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Infante pre-escolar (menor de 2 anos)"=5)
mydata <- ordinal_recode (variable="p4a2", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4a2. Hermano(a) 
##                                      Estudia        Estudia y tiene un trabajo remunerado                           Trabajo remunerado 
##                                          584                                            9                                           15 
## Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                           34                                           44                                         2062 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1   584     0     0     0         0
##   2     0     9     0     0         0
##   3     0     0    15     0         0
##   4     0     0     0    34         0
##   5     0     0     0     0        44
## [1] "Frequency table after encoding"
## p4a2. Hermano(a) 
##                                      Estudia                                        Otros Quehaceres del hogar o trabajo no remunerado 
##                                          584                                           24                                           34 
##        Infante pre-escolar (menor de 2 anos)                                         <NA> 
##                                           44                                         2062 
## [1] "Inspect value labels and relabel as necessary"
##                                      Estudia                                        Otros                                        Otros 
##                                            1                                            2                                            3 
## Quehaceres del hogar o trabajo no remunerado        Infante pre-escolar (menor de 2 anos) 
##                                            4                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Otros"=4,
                     "Infante pre-escolar (menor de 2 anos)"=5)
mydata <- ordinal_recode (variable="p4a3", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4a3. Hermano(a) 
##                                      Estudia        Estudia y tiene un trabajo remunerado                           Trabajo remunerado 
##                                          361                                            1                                            7 
## Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                           20                                           51                                         2308 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1   361     0     0     0         0
##   2     0     1     0     0         0
##   3     0     0     7     0         0
##   4     0     0     0    20         0
##   5     0     0     0     0        51
## [1] "Frequency table after encoding"
## p4a3. Hermano(a) 
##                               Estudia                                 Otros Infante pre-escolar (menor de 2 anos)                                  <NA> 
##                                   361                                    28                                    51                                  2308 
## [1] "Inspect value labels and relabel as necessary"
##                               Estudia                                 Otros                                 Otros                                 Otros 
##                                     1                                     2                                     3                                     4 
## Infante pre-escolar (menor de 2 anos) 
##                                     5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Otros"=4,
                     "Infante pre-escolar (menor de 2 anos)"=5)
mydata <- ordinal_recode (variable="p4a4", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4a4. Hermano(a) 
##                                      Estudia        Estudia y tiene un trabajo remunerado                           Trabajo remunerado 
##                                          176                                            1                                            2 
## Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos)                                         <NA> 
##                                            9                                           43                                         2517 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1   176     0     0     0         0
##   2     0     1     0     0         0
##   3     0     0     2     0         0
##   4     0     0     0     9         0
##   5     0     0     0     0        43
## [1] "Frequency table after encoding"
## p4a4. Hermano(a) 
##                               Estudia                                 Otros Infante pre-escolar (menor de 2 anos)                                  <NA> 
##                                   176                                    12                                    43                                  2517 
## [1] "Inspect value labels and relabel as necessary"
##                               Estudia                                 Otros                                 Otros                                 Otros 
##                                     1                                     2                                     3                                     4 
## Infante pre-escolar (menor de 2 anos) 
##                                     5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Otros"=4,
                     "Infante pre-escolar (menor de 2 anos)"=5)
mydata <- ordinal_recode (variable="p4a5", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4a5. Hermano(a) 
##                                      Estudia Quehaceres del hogar o trabajo no remunerado         Infante pre-escolar (menor a 2 anos) 
##                                           71                                            4                                           36 
##                                         <NA> 
##                                         2637 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1    71     0     0     0         0
##   4     0     0     0     4         0
##   5     0     0     0     0        36
## [1] "Frequency table after encoding"
## p4a5. Hermano(a) 
##                               Estudia                                 Otros Infante pre-escolar (menor de 2 anos)                                  <NA> 
##                                    71                                     4                                    36                                  2637 
## [1] "Inspect value labels and relabel as necessary"
##                               Estudia                                 Otros                                 Otros                                 Otros 
##                                     1                                     2                                     3                                     4 
## Infante pre-escolar (menor de 2 anos) 
##                                     5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4b1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4b1. Abuelo(a) 
##                                      Estudia Quehaceres del hogar o trabajo no remunerado                                         <NA> 
##                                            3                                          101                                         2644 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1     3     0     0     0         0
##   4     0     0     0   101         0
## [1] "Frequency table after encoding"
## p4b1. Abuelo(a) 
##                                        Otros Quehaceres del hogar o trabajo no remunerado                                         <NA> 
##                                            3                                          101                                         2644 
## [1] "Inspect value labels and relabel as necessary"
##                                        Otros                                        Otros                                        Otros 
##                                            1                                            2                                            3 
## Quehaceres del hogar o trabajo no remunerado                                        Otros 
##                                            4                                            5

Matching and crosstabulations: Run automated PII check

# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('sexo', 'grado2015') ##!!! Replace with candidate categorical demo vars
selectedKeyVars2= c('i14','p4_1')

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2748 rows and 1641 variables.
##   --> Categorical key variables: sexo, grado2015
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size            Size of smallest (>0)       
##          sexo                    3 (3)  1354.500 (1354.500)                  1319 (1319)
##     grado2015                    3 (3)  1354.500 (1354.500)                  1167 (1167)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 0 (0.000%)
## 
## ----------------------------------------------------------------------
sdcInitial2 <- createSdcObj(dat = mydata, keyVars = selectedKeyVars2)
sdcInitial2
## The input dataset consists of 2748 rows and 1641 variables.
##   --> Categorical key variables: i14, p4_1
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)      
##           i14                    3 (3)   508.000 (508.000)                   370 (370)
##          p4_1                    5 (5)   219.500 (219.500)                     4   (4)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 0 (0.000%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 0 x 2
## # ... with 2 variables: sexo <dbl>, grado2015 <dbl>
sdcFinal <- localSuppression(sdcInitial)

notAnon2 <- sdcInitial2@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon2,selectedKeyVars2]
## # A tibble: 0 x 2
## # ... with 2 variables: i14 <dbl+lbl>, p4_1 <dbl+lbl>
sdcFinal2 <- localSuppression(sdcInitial2)

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("dropout_reasons_otro",
               "rp_finance_2a",
               "p15a_prop",
               "p44c",
               "p51a",
               "centro_poblado",
               "referencia",
               "school_fixed",
               "p11b",
               "p13c1",
               "rs_finance_2a",
               "q48",
               "act_sd_4o",
               "act_sd_5o",
               "act_sd_6o",
               "act_sd_7o",
               "act_sd_8o",
               "act_sd_9o",
               "act_sd_10o",
               "act_sd_11o",
               "act_sd_12o",
               "act_sd_13o",
               "act_sd_14o",
               "act_sd_15o",
               "act_sd_16o",
               "act_sd_17o",
               "act_sd_18o",
               "act_sd_19o",
               "act_sd_20o",
               "act_sd_21o",
               "act_sd_22o",
               "act_sd_23o",
               "act_sd_24o",
               "act_sd_1o",
               "act_sd_2o",
               "act_sd_3o",
               "act_wed_4o",
               "act_wed_5o",
               "act_wed_6o",
               "act_wed_7o",
               "act_wed_8o",
               "act_wed_9o",
               "act_wed_10o",
               "act_wed_11o",
               "act_wed_12o",
               "act_wed_13o",
               "act_wed_14o",
               "act_wed_15o",
               "act_wed_16o",
               "act_wed_17o",
               "act_wed_18o",
               "act_wed_19o",
               "act_wed_20o",
               "act_wed_21o",
               "act_wed_22o",
               "act_wed_23o",
               "act_wed_24o",
               "act_wed_1o",
               "act_wed_2o",
               "act_wed_3o",
               "switcher_2016_otro",
               "switcher_2015_otro",
               "switcher_2014_otro",
               "p35a1",
               "s4p11b_2015",
               "s4p13c1_2015",
               "s5p18_2015")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% "dropout_reasons_otro"]
mydata <- mydata[!names(mydata) %in% "rp_finance_2a"]
mydata <- mydata[!names(mydata) %in% "p15a_prop"]
mydata <- mydata[!names(mydata) %in% "p44c"]
mydata <- mydata[!names(mydata) %in% "p51a"]
mydata <- mydata[!names(mydata) %in% "centro_poblado"]
mydata <- mydata[!names(mydata) %in% "referencia"]
mydata <- mydata[!names(mydata) %in% "school_fixed"]
mydata <- mydata[!names(mydata) %in% "p11b"]
mydata <- mydata[!names(mydata) %in% "p13c1"]
mydata <- mydata[!names(mydata) %in% "rs_finance_2a"]
mydata <- mydata[!names(mydata) %in% "q48"]
mydata <- mydata[!names(mydata) %in% "act_sd_4o"]
mydata <- mydata[!names(mydata) %in% "act_sd_5o"]
mydata <- mydata[!names(mydata) %in% "act_sd_6o"]
mydata <- mydata[!names(mydata) %in% "act_sd_7o"]
mydata <- mydata[!names(mydata) %in% "act_sd_8o"]
mydata <- mydata[!names(mydata) %in% "act_sd_9o"]
mydata <- mydata[!names(mydata) %in% "act_sd_10o"]
mydata <- mydata[!names(mydata) %in% "act_sd_11o"]
mydata <- mydata[!names(mydata) %in% "act_sd_12o"]
mydata <- mydata[!names(mydata) %in% "act_sd_13o"]
mydata <- mydata[!names(mydata) %in% "act_sd_14o"]
mydata <- mydata[!names(mydata) %in% "act_sd_15o"]
mydata <- mydata[!names(mydata) %in% "act_sd_16o"]
mydata <- mydata[!names(mydata) %in% "act_sd_17o"]
mydata <- mydata[!names(mydata) %in% "act_sd_18o"]
mydata <- mydata[!names(mydata) %in% "act_sd_19o"]
mydata <- mydata[!names(mydata) %in% "act_sd_20o"]
mydata <- mydata[!names(mydata) %in% "act_sd_21o"]
mydata <- mydata[!names(mydata) %in% "act_sd_22o"]
mydata <- mydata[!names(mydata) %in% "act_sd_23o"]
mydata <- mydata[!names(mydata) %in% "act_sd_24o"]
mydata <- mydata[!names(mydata) %in% "act_sd_1o"]
mydata <- mydata[!names(mydata) %in% "act_sd_2o"]
mydata <- mydata[!names(mydata) %in% "act_sd_3o"]
mydata <- mydata[!names(mydata) %in% "act_wed_4o"]
mydata <- mydata[!names(mydata) %in% "act_wed_5o"]
mydata <- mydata[!names(mydata) %in% "act_wed_6o"]
mydata <- mydata[!names(mydata) %in% "act_wed_7o"]
mydata <- mydata[!names(mydata) %in% "act_wed_8o"]
mydata <- mydata[!names(mydata) %in% "act_wed_9o"]
mydata <- mydata[!names(mydata) %in% "act_wed_10o"]
mydata <- mydata[!names(mydata) %in% "act_wed_11o"]
mydata <- mydata[!names(mydata) %in% "act_wed_12o"]
mydata <- mydata[!names(mydata) %in% "act_wed_13o"]
mydata <- mydata[!names(mydata) %in% "act_wed_14o"]
mydata <- mydata[!names(mydata) %in% "act_wed_15o"]
mydata <- mydata[!names(mydata) %in% "act_wed_16o"]
mydata <- mydata[!names(mydata) %in% "act_wed_17o"]
mydata <- mydata[!names(mydata) %in% "act_wed_18o"]
mydata <- mydata[!names(mydata) %in% "act_wed_19o"]
mydata <- mydata[!names(mydata) %in% "act_wed_20o"]
mydata <- mydata[!names(mydata) %in% "act_wed_21o"]
mydata <- mydata[!names(mydata) %in% "act_wed_22o"]
mydata <- mydata[!names(mydata) %in% "act_wed_23o"]
mydata <- mydata[!names(mydata) %in% "act_wed_24o"]
mydata <- mydata[!names(mydata) %in% "act_wed_1o"]
mydata <- mydata[!names(mydata) %in% "act_wed_2o"]
mydata <- mydata[!names(mydata) %in% "act_wed_3o"]
mydata <- mydata[!names(mydata) %in% "switcher_2016_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2015_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2014_otro"]
mydata <- mydata[!names(mydata) %in% "p35a1"]
mydata <- mydata[!names(mydata) %in% "s4p11b_2015"]
mydata <- mydata[!names(mydata) %in% "s4p13c1_2015"]
mydata <- mydata[!names(mydata) %in% "s5p18_2015"]

GPS data: Displace

mydata <- mydata[!names(mydata) %in% "geo_pointsaltitude"]
mydata <- mydata[!names(mydata) %in% "geo_points1"]

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))

mydata <- haven::read_dta(paste0(filename, "_PU.dta"))

colnames(mydata) <- gsub('^_', '', colnames(mydata))
mydata[is.na(mydata)] <- NA
names(mydata)[names(mydata) == "ANEXO"] <- "ANEXO1"
names(mydata)[names(mydata) == "COD_MOD"] <- "cod_mod_spss"
haven::write_sav(mydata, paste0(filename, "_PU.sav"))


# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)