rm(list=ls(all=t))

Setup filenames

filename <- "InDepthStudents2016_Rural_Raw_NOPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 


#!!!Save flagged dictionary in .csv format, add "DatasetReview" to name and continue processing data with subset of flagged variables

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("nomest",
              "apest",
              "amest",
              "nompad",
              "app_pad",
              "nommad",
              "app_mad",
              "p1a1_fixed",
              "p1a2",
              "p1a2_fixed",
              "p1a3",
              "p1a3_fixed",
              "p1a4",
              "p1a4_fixed",
              "address",
              "referencia",
              "audio1_student",
              "audio2_student",
              "audio3_student",
              "text_audit",
              "cto_padre",
              "cto_padre_nom",
              "cto_padre_app1",
              "cto_padre_app2",
              "audio_random",
              "key") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

#  Interviewer names, for example  may be useful for analysis of interviewer effects

!!!Replace vector in "variables" field below with relevant variable names

mydata <- mydata[!names(mydata) %in% "i5"]

Small locations: Encode locations with pop <100,000 using random large numbers

!!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("i8a",
             "i7",
             "i9a1",
             "cod_mod",
             "school_fixed_primary",
             "school_fixed_sec",
             "nom_dist",
             "district_fixed",
             "p12",
             "school2014_name1",
             "school2013_name1") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## i8a. Provincia
##     AREQUIPA       CAMANA     CASTILLA     CAYLLOMA   CONDESUYOS     LA UNION        CUSCO      ACOMAYO 
##           12            2           88           39           48          148           52           47 
##         ANTA        CALCA        CANAS      CANCHIS CHUMBIVILCAS      ESPINAR       PARURO  PAUCARTAMBO 
##          291          161          227            2          744           55          314          395 
## QUISPICANCHI     URUBAMBA         <NA> 
##           17          111           16 
## [1] "Frequency table after encoding"
## i8a. Provincia
##  841  842  843  844  845  846  847  848  849  850  851  852  853  854  855  856  857  858 <NA> 
##  227  744  161   52    2  291   48  395  314   55  148   88    2   39   17  111   12   47   16 
## [1] "Frequency table before encoding"
## i7. Distrito
##           AREQUIPA  ALTO SELVA ALEGRE     CERRO COLORADO      JACOBO HUNTER         PAUCARPATA 
##                  1                  1                  3                  2                  1 
##            SACHACA           SOCABAYA               YURA JOSE MARIA QUIMPER      SAMUEL PASTOR 
##                  1                  1                  2                  1                  1 
##            ANDAGUA            CHACHAS       CHILCAYMARCA              CHOCO          ORCOPAMPA 
##                  3                 20                  8                  1                 47 
##         PAMPACOLCA              TIPAN             VIRACO             CHIVAY             ACHOMA 
##                  5                  2                  2                  6                  1 
##           CAYLLOMA             SIBAYO              TAPAY        CHUQUIBAMBA           CAYARANI 
##                 29                  1                  2                  5                 38 
##               IRAY          SALAMANCA          COTAHUASI               ALCA        HUAYNACOTAS 
##                  3                  2                 14                 21                 21 
##         PAMPAMARCA              PUYCA          TOMEPAMPA              CUSCO             CCORCA 
##                 27                 54                 11                  5                 10 
##       SAN JERONIMO      SAN SEBASTIAN           SANTIAGO            WANCHAQ               ACOS 
##                 15                  1                 19                  2                  3 
##           RONDOCAN               ANTA          ANCAHUASI      CHINCHAYPUJIO         HUAROCONDO 
##                 44                 87                 97                 57                 34 
##            PUCYURA             ZURITE              CALCA              LAMAY              PISAC 
##                  1                 15                  1                 40                 61 
##       SAN SALVADOR            YANAOCA             CHECCA        KUNTURKANKI             LANGUI 
##                 59                  1                135                 80                  9 
##             QUEHUE            SICUANI        SANTO TOMAS         CAPACMARCA            CHAMACA 
##                  2                  2                 99                 33                116 
##        COLQUEMARCA           LIVITACA             LLUSCO     QUI<U+FFFD>OTA            VELILLE 
##                101                139                103                 57                 96 
##            ESPINAR          COPORAQUE             PARURO              ACCHA              CCAPI 
##                  6                 49                 16                 38                 21 
##             COLCHA         HUANOQUITE             OMACHA       PACCARITAMBO          PILLPINTO 
##                 18                 37                111                 21                  1 
##          YAURISQUE        PAUCARTAMBO             CAICAY        CHALLABAMBA         COLQUEPATA 
##                 51                  6                 42                 63                149 
##         HUANCARANI     ANDAHUAYLILLAS              LUCRE           URUBAMBA          CHINCHERO 
##                135                  1                 16                 29                 44 
##       HUAYLLABAMBA              MARAS      OLLANTAYTAMBO               <NA> 
##                  1                 32                  5                 16 
## [1] "Frequency table after encoding"
## i7. Distrito
##  644  645  646  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663  664  665 
##   49    1   15   42   57   99    1   34    2    8  139   15    3  103   16    1   37   44    1  111   21   27 
##  666  667  668  669  670  671  672  673  674  675  676  677  678  679  680  681  682  683  684  685  686  687 
##    6    1    1   57  101    3   21  135   96   33    2   14    1   11    1   16  135    1    1   38   40   47 
##  688  689  690  691  692  693  694  695  696  697  698  699  700  701  702  703  704  705  706  707  708  709 
##   97   18   10    5   51    1    9   19  149    5    1   20   21    1   61    3    6   44   87   29    3    2 
##  710  711  712  713  714  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731 
##   32    2    2    2    5   80    1   54   38  116    1    5   29    2    1    1    2   63   59    2   21    6 
## <NA> 
##   16 
## [1] "Frequency table before encoding"
## i9a1. Código modular
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815 
##      10       8       2       5       6       8      11       7       6       7       9       2       5 
##  206334  207373  207407  216341  219741  220285  226704  232207  232223  232231  232249  232264  232504 
##       3       1       1       8       2       3       6       9       9       6       2       7       3 
##  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777 
##       6       3       5       5       4       3       7       5       7       2       1       3       6 
##  233130  233296  233361  233676  233718  233734  233825  233882  233890  233908  233916  233924  233932 
##       4       5       4       3       7       5       3       1       3       6       8       9       3 
##  233940  233957  233965  233973  233981  233999  234021  234062  234096  234104  234112  234120  234138 
##       4       6       3       5       6       8       7       5       6       6       4       4       8 
##  234153  234161  234187  234195  234203  234229  234237  234351  234369  234377  234385  234401  234419 
##       6       8       5       1       4       5       6       1       9       5       8       7       8 
##  234427  234443  234450  234500  234583  234674  234682  234781  234831  234856  236158  236349  236422 
##       7       6       3       7       3       9       7       3       7       7       6       5      16 
##  236448  236463  236471  236489  236653  236661  236927  287409  287425  287466  309286  309294  309377 
##       4       8       1       7       1      31       9      10       6       3       1      12       1 
##  309419  309435  309567  310433  310441  312090  312215  312306  312421  312744  312868  313080  313239 
##       3       1       4       3       1       2       6      10       5       2       2       1       2 
##  313395  313460  313890  313908  313965  313981  314070  314187  314211  314237  314245  314252  314260 
##       9       1       3       8       6       6       2       5       4       4       4       6       6 
##  314278  314294  405258  405498  405704  405738  405746  405852  405894  405902  405928  405936  406009 
##      10       5       6       8       5       5       4       9       8       8       8       9       6 
##  406066  406082  406116  406124  406140  406215  406223  406264  406413  406595  406629  406645  406975 
##      10       7       6       8       3       5       6       4       8       8       9       5       6 
##  406983  407007  407049  408211  408245  408278  408286  408294  408328  408336  408393  408468  408476 
##      10       4       7       1       7       4       4       3       5       1       4       8       7 
##  408484  408492  408559  408567  408609  408666  408732  408773  408823  408856  408922  408955  408971 
##       5       5       4       8       6       8       3       4       3       8       7       4       5 
##  409003  409011  409029  409193  409227  409235  409243  409284  409292  409300  409318  409326  409359 
##       7      10       9       5       8       8       6       9       7       9       7       4       7 
##  409441  409565  409896  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249 
##       9       4       2       1       7       5       1       4       7       9       5       7       7 
##  481283  486688  486928  489120  495069  495325  498782  499863  502922  504142  517581  517888  518084 
##       9       8       2       8      14      16       1       3       2      13      29       8      22 
##  518472  519496  519595  519678  525923  550392  551309  557587  579268  579276  579284  579292  579300 
##      10       6       6       6      21       7       3       9       9       1       1      17      20 
##  585885  587055  587147  587204  589200  589747  589804  591255  591602  592147  612051  612119  612291 
##       6      15       4      13       1       1       5      23      14       4       2       2       5 
##  612416  612507  612689  612747  612770  612804  615013  616110  617787  617829  621391  623017  623041 
##       2       9       1       1       1       4       5       6      17       6      15       4       2 
##  637215  637272  639542  647388  647412  647446  647628  655746  671628  672105  678961  679829  680058 
##       1       9       2      10       7      16       4       1       4       4       2       2       9 
##  680082  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  775700  776039 
##       1      18      12       2       2       2       3       6       2       5       2       5       3 
##  783423  783597  783621  783696  783704  783720  783787  783795  791319  791574  794438  796888  818674 
##      12       1       1      11       1      14      10      10      20       4       2       2       6 
##  818708  844159  844183  891408  891812  895482  899351  927871  930958  931055  931063  932236  932434 
##       3       2       4      14       1       1       3      10       1      15      18      12       2 
##  932491  932608  932848  933226  933283  933291  933317  933531  933598  933846 1031574 1117704 1120005 
##       4       8       7       6      10       1       6       1       2       6       4      10       1 
## 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 1321355 1321421 1327279 
##      15       6       2       6       1       2       2      10       6      11       9      11       2 
## 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 
##       9       3       9      12      13       7       1      13       8       1       8      19      11 
## 1347970 1352269 1364868 1369248 1372507 1374438 1377209 1377233 1377415 1379361 1379544 1380021 1380120 
##      14       1       7      14       7       1      11       1      14       1       4      20       4 
## 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665 1390673 1392083 
##      10       2       5      11       9       5       2      11       6       1      17      16       7 
## 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 1392240 1392257 1396191 1396209 1396225 
##       6      10       7      13      16      14       2       7       3       8      19      11      27 
## 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 1408426 1412634 1412873 1415983 
##      18       4       9       1       1      11       8      17       7       2       5       3       4 
## 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 
##       4      12       5       7       9       9      12       9       8       6       7      10      10 
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1719210 1723469    <NA> 
##       7       8       8       3       4       9       1       3       2      16 
## [1] "Frequency table after encoding"
## i9a1. Código modular
##  559  560  561  562  563  564  565  566  567  568  569  570  571  572  573  574  575  576  577  578  579  580 
##    2   17    4    6    6   10    1   16    9    1    2    8    6    6   10    7   15    1    1    7    4   22 
##  581  582  583  584  585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  601  602 
##    1    3    9    9    6    9    5    2    6    1    4    7    9    3    3    7    7    1   12    9    3    8 
##  603  604  605  606  607  608  609  610  611  612  613  614  615  616  617  618  619  620  621  622  623  624 
##    2    4    5    9    7    9    4    4   14    5    6    6   15    9   18    5    3    6   13    1    1   10 
##  625  626  627  628  629  630  631  632  633  634  635  636  637  638  639  640  641  642  643  644  645  646 
##    7    9   10    1    1    8    5    3    2    4    5   14    6   16    8    3    2    6    1    4    7    7 
##  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663  664  665  666  667  668 
##    3    7    1    4    8    6    8    2    3    3    3   10   11   29   10   10    8    6    2    7    5    7 
##  669  670  671  672  673  674  675  676  677  678  679  680  681  682  683  684  685  686  687  688  689  690 
##    6    2    1    1    4    8    8    2   27    6    8    5    6    5   14    4    6    2    9    9    7    7 
##  691  692  693  694  695  696  697  698  699  700  701  702  703  704  705  706  707  708  709  710  711  712 
##    6    9    3    9    3    7    3   20   31    5   10    8    9    6   11    8    5    5    9   12   11   14 
##  713  714  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731  732  733  734 
##    4    5    8    6    8    5   11   15    1    4   10    2    8    2    8    1    4    5    1    8    9    1 
##  735  736  737  738  739  740  741  742  743  744  745  746  747  748  749  750  751  752  753  754  755  756 
##    5   18    2    7   11   12    3   19    6    7    7    1   11    5   10   12    8    2    2    4    1    7 
##  757  758  759  760  761  762  763  764  765  766  767  768  769  770  771  772  773  774  775  776  777  778 
##   14    9    5   17    6    9    6    2    5   12    7    2    8    2    5    1    7    4    7    8    4    5 
##  779  780  781  782  783  784  785  786  787  788  789  790  791  792  793  794  795  796  797  798  799  800 
##    9    2    6    7    7    7    8   12    8    1    7   20    3    6    8    6    4    6    5    1    3   10 
##  801  802  803  804  805  806  807  808  809  810  811  812  813  814  815  816  817  818  819  820  821  822 
##    4    6    1    2   16    1    4    9    1   14    6    8   14    9    3    1    2    2    1    2    4    5 
##  823  824  825  826  827  828  829  830  831  832  833  834  835  836  837  838  839  840  841  842  843  844 
##    9   13   10    5    7    6    4    2    4   16    3    1    8    9    4    2    1    6    1    1    1    1 
##  845  846  847  848  849  850  851  852  853  854  855  856  857  858  859  860  861  862  863  864  865  866 
##    2   21    2    8    1   19   11    1    1   13    9    6    6    3   10   10    4    1    3    6    2    1 
##  867  868  869  870  871  872  873  874  875  876  877  878  879  880  881  882  883  884  885  886  887  888 
##    2    7    5    3    4    1    5    6   10    9    1    8    7    5    4    1    2    3   10    7    7    4 
##  889  890  891  892  893  894  895  896  897  898  899  900  901  902  903  904  905  906  907  908  909  910 
##    3    6    3    5    3    8    3    7    9   15   16    6   17   23   13    2    5    5    6    1    6    9 
##  911  912  913  914  915  916  917  918  919  920  921  922  923  924  925  926  927  928  929  930  931  932 
##    3   11    7    6    6    8   11    8    8    9    9    1    6    1    2    3    7    4    3    8    1    2 
##  933  934  935  936  937  938  939  940  941  942  943  944  945  946  947  948  949  950  951  952  953  954 
##    6   10    2    5    1    5    8    7    9    4    7    4   18    6    4    4    5    4   20   10    4    1 
##  955  956  957  958  959  960  961  962  963  964  965  966  967  968  969  970  971  972  973  974  975  976 
##   11    7    2   17    4    7    3    5    2    6    8    7    4    2   12    4    5    7   10    2    4    4 
##  977  978  979  980  981  982  983 <NA> 
##   14    8   13    4    3    3    5   16 
## [1] "Frequency table before encoding"
## cod_mod. 
##         1031574 1117704 1120005 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 
##      16       4      10       1      15       6       2       6       1       2       2      10       6 
## 1321330 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 
##      11       9      11       2       9       3       9      12      13       7       1      13       8 
## 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1374438 1377209 1377233 1377415 
##       1       8      19      11      14       1       7      14       7       1      11       1      14 
## 1379361 1379544 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 
##       1       4      20       4      10       2       5      11       9       5       2      11       6 
## 1390582 1390665 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 1392240 
##       1      17      16       7       6      10       7      13      16      14       2       7       3 
## 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 
##       8      19      11      27      18       4       9       1       1      11       8      17       7 
## 1408426 1412634 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 
##       2       5       3       4       4      12       5       7       9       9      12       9       8 
## 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1719210 1723469 
##       6       7      10      10       7       8       8       3       4       9       1       3       2 
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815 
##      10       8       2       5       6       8      11       7       6       7       9       2       5 
##  206334  207373  207407  216341  219741  220285  226704  232207  232223  232231  232249  232264  232504 
##       3       1       1       8       2       3       6       9       9       6       2       7       3 
##  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777 
##       6       3       5       5       4       3       7       5       7       2       1       3       6 
##  233130  233296  233361  233676  233718  233734  233825  233882  233890  233908  233916  233924  233932 
##       4       5       4       3       7       5       3       1       3       6       8       9       3 
##  233940  233957  233965  233973  233981  233999  234021  234062  234096  234104  234112  234120  234138 
##       4       6       3       5       6       8       7       5       6       6       4       4       8 
##  234153  234161  234187  234195  234203  234229  234237  234351  234369  234377  234385  234401  234419 
##       6       8       5       1       4       5       6       1       9       5       8       7       8 
##  234427  234443  234450  234500  234583  234674  234682  234781  234831  234856  236158  236349  236422 
##       7       6       3       7       3       9       7       3       7       7       6       5      16 
##  236448  236463  236471  236489  236653  236661  236927  287409  287425  287466  309286  309294  309377 
##       4       8       1       7       1      31       9      10       6       3       1      12       1 
##  309419  309435  309567  310433  310441  312090  312215  312306  312421  312744  312868  313080  313239 
##       3       1       4       3       1       2       6      10       5       2       2       1       2 
##  313395  313460  313890  313908  313965  313981  314070  314187  314211  314237  314245  314252  314260 
##       9       1       3       8       6       6       2       5       4       4       4       6       6 
##  314278  314294  405258  405498  405704  405738  405746  405852  405894  405902  405928  405936  406009 
##      10       5       6       8       5       5       4       9       8       8       8       9       6 
##  406066  406082  406116  406124  406140  406215  406223  406264  406413  406595  406629  406645  406975 
##      10       7       6       8       3       5       6       4       8       8       9       5       6 
##  406983  407007  407049  408211  408245  408278  408286  408294  408328  408336  408393  408468  408476 
##      10       4       7       1       7       4       4       3       5       1       4       8       7 
##  408484  408492  408559  408567  408609  408666  408732  408773  408823  408856  408922  408955  408971 
##       5       5       4       8       6       8       3       4       3       8       7       4       5 
##  409003  409011  409029  409193  409227  409235  409243  409284  409292  409300  409318  409326  409359 
##       7      10       9       5       8       8       6       9       7       9       7       4       7 
##  409441  409565  409896  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249 
##       9       4       2       1       7       5       1       4       7       9       5       7       7 
##  481283  486688  486928  489120  495069  495325  498782  499863  502922  504142  517581  517888  518084 
##       9       8       2       8      14      16       1       3       2      13      29       8      22 
##  518472  519496  519595  519678  525923  550392  551309  557587  579268  579276  579284  579292  579300 
##      10       6       6       6      21       7       3       9       9       1       1      17      20 
##  585885  587055  587147  587204  589200  589747  589804  591255  591602  592147  612051  612119  612291 
##       6      15       4      13       1       1       5      23      14       4       2       2       5 
##  612416  612507  612689  612747  612770  612804  615013  616110  617787  617829  621391  623017  623041 
##       2       9       1       1       1       4       5       6      17       6      15       4       2 
##  637215  637272  639542  647388  647412  647446  647628  655746  671628  672105  678961  679829  680058 
##       1       9       2      10       7      16       4       1       4       4       2       2       9 
##  680082  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  775700  776039 
##       1      18      12       2       2       2       3       6       2       5       2       5       3 
##  783423  783597  783621  783696  783704  783720  783787  783795  791319  791574  794438  796888  818674 
##      12       1       1      11       1      14      10      10      20       4       2       2       6 
##  818708  844159  844183  891408  891812  895482  899351  927871  930958  931055  931063  932236  932434 
##       3       2       4      14       1       1       3      10       1      15      18      12       2 
##  932491  932608  932848  933226  933283  933291  933317  933531  933598  933846 
##       4       8       7       6      10       1       6       1       2       6 
## [1] "Frequency table after encoding"
## cod_mod. 
## 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 
##   7   8   2   9   4  11   5   9   1   9   6   4   4   5   3  31   4  10   2   1  16   3   4   8   6  13   1 
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 
##  15   7   1   2   4   1  15   2   5   7  10   3   9   2  13   2   8  10   9   1  10   6  10  14   6   6   9 
## 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 
##   3   9   7   2   7   4   4   6  18   2   5   3   4   7   2   2   8   6   8  12   1   6   1  17   3   8   3 
## 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 
##   8   3   1   1   1  12   4  16  17   7  12   2   1   1   9   9   4   3   6   9   4   9   6   5   7   7   9 
## 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 
##   7   7   9   1   5   5   6   3   4   3   3   9   9   7  10   7   7   2  21  18   8   6   7  14   2   6   3 
## 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 
##   9   4   9   6   2   9   8   9   5   4   4   5  17   3   8   8   6   8   6   8   4   3   7   6   4   3  10 
## 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 
##   6   4   5   1  10   7  22   4   4   5   7   3   3   9  14   7   3  10  14   7   5   9  11   4  19  10   1 
## 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 
##   4   3   1   5   7   5   7   1  16   1   3   5   7   4   2   2   4   6   4  20   2   6  14  23  11  10   7 
## 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 
##  11   1   8  10  16   2   9  13  11   3   9   7   2   5   4   8   1  20  11   1   4   2   1   8   7   8   8 
## 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 
##   5  16   8   2   2   6   9  12   2   7   9   5   2   3  12   8   4   2   6   8   8   1   5   9   7   1   4 
## 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 
##   6   8   3   7   3  10   5   9   8  19  10   6  15   6   1  11   1   3  20   1   4   7   6  14   4   1   1 
## 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 
##   5   2   7   7   1   6   3   3   8   8   8   7  14   1   6  10   4  11   6   7   5   1   6   6   4   9   6 
## 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 
##   2  16   8   9   1   2  27   1   5   4   3   2   5   7   8   9   2  11   6   8   2   8   5  10   1   6   6 
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 
##   6   4   5   5   1   5   5  12   1   3  13   5   7   8   6   1   5  29   6   6   5   8  11   4   7   7   5 
## 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 
##   2   1   6   1   3  13  10   2   2   4   6   2   8   1   4   6  10   3   4   1   5   1   3  10   1   7   6 
## 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 
##   6   9   1   6  18  15   2  12   1   7  17   1   1   8   5   5   2   2   2  14   4 
## [1] "Frequency table before encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
##        219741 312561 405217 408922 408971 409243 612291 647388 679829 
##   2757      1      1      2      1      1      1      1      1      3 
## [1] "Frequency table after encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
##  451  452  453  454  455  456  457  458  459  460 
##    1    1    1    1 2757    3    1    2    1    1 
## [1] "Frequency table before encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
##         1253905 1345024 1347301 1347434 1347921 1379544 1380021 1392240 1395367 1401934 1402536 1452705 
##    2725       2       1       3       1       1       2       1       1       1       1       1       1 
## 1540996  236109  309567  579300  589804  612507  616110  621391  637272  680124  783720  791319  931055 
##       1       1       1       1       2       1       1       1       7       1       2       2       3 
##  931436  932608  933226  933317 
##       1       1       1       1 
## [1] "Frequency table after encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
##  959  960  961  962  963  964  965  966  967  968  969  970  971  972  973  974  975  976  977  978  979  980 
##    1    1    1    1    1    1    1    3    1    1    1    2    1    2    1    1 2725    1    1    1    3    7 
##  981  982  983  984  985  986  987  988 
##    2    1    2    1    1    1    2    1 
## [1] "Frequency table before encoding"
## nom_dist. 
##                                 ACCHA             ACHOMA               ACOS               ALCA 
##                 16                 38                  1                  3                 21 
##  ALTO SELVA ALEGRE          ANCAHUASI            ANDAGUA     ANDAHUAYLILLAS               ANTA 
##                  1                 97                  3                  1                 87 
##           AREQUIPA             CAICAY              CALCA         CAPACMARCA           CAYARANI 
##                  1                 42                  1                 33                 38 
##           CAYLLOMA              CCAPI             CCORCA     CERRO COLORADO            CHACHAS 
##                 29                 21                 10                  3                 20 
##        CHALLABAMBA            CHAMACA             CHECCA       CHILCAYMARCA      CHINCHAYPUJIO 
##                 63                116                135                  8                 57 
##          CHINCHERO             CHIVAY              CHOCO        CHUQUIBAMBA             COLCHA 
##                 44                  6                  1                  5                 18 
##        COLQUEMARCA         COLQUEPATA          COPORAQUE          COTAHUASI              CUSCO 
##                101                149                 49                 14                  5 
##            ESPINAR         HUANCARANI         HUANOQUITE         HUAROCONDO       HUAYLLABAMBA 
##                  6                135                 37                 34                  1 
##        HUAYNACOTAS               IRAY      JACOBO HUNTER JOSE MARIA QUIMPER        KUNTURKANKI 
##                 21                  3                  2                  1                 80 
##              LAMAY             LANGUI           LIVITACA             LLUSCO              LUCRE 
##                 40                  9                139                103                 16 
##              MARAS      OLLANTAYTAMBO             OMACHA          ORCOPAMPA       PACCARITAMBO 
##                 32                  5                111                 47                 21 
##         PAMPACOLCA         PAMPAMARCA             PARURO         PAUCARPATA        PAUCARTAMBO 
##                  5                 27                 16                  1                  6 
##          PILLPINTO              PISAC            PUCYURA              PUYCA             QUEHUE 
##                  1                 61                  1                 54                  2 
##     QUI<U+FFFD>OTA           RONDOCAN            SACHACA          SALAMANCA      SAMUEL PASTOR 
##                 57                 44                  1                  2                  1 
##       SAN JERONIMO       SAN SALVADOR      SAN SEBASTIAN           SANTIAGO        SANTO TOMAS 
##                 15                 59                  1                 19                 99 
##             SIBAYO            SICUANI           SOCABAYA              TAPAY              TIPAN 
##                  1                  2                  1                  2                  2 
##          TOMEPAMPA           URUBAMBA            VELILLE             VIRACO            WANCHAQ 
##                 11                 29                 96                  2                  2 
##            YANAOCA          YAURISQUE               YURA             ZURITE 
##                  1                 51                  2                 15 
## [1] "Frequency table after encoding"
## nom_dist. 
## 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 
##   2  14   2  38  80   1  47   1 101 135  21  61   1  16  99  10   5  33  49  27  59   1  15   2   1 103  15 
## 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 
##   6  97  21  38  57   2   1   1  32  42 149  16   1   3   1  87  40  20   1   1   2   9   3   1  37  44   8 
## 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 
##   2  51  11  63   6   1   1  29 139  29  18  21  19   5   3   2  44   2  96   3  21  16   2   5 135   6   1 
## 720 721 722 723 724 725 726 727 
##  34  54  57 116 111   1   5   1 
## [1] "Frequency table before encoding"
## district_fixed. ¿Entonces en qué distrito vives?
##           Otro CERRO COLORADO        CHACHAS      ORCOPAMPA         VIRACO           ALCA    HUAYNACOTAS 
##             28              1              1              8              1              1              1 
##          PUYCA      TOMEPAMPA   SAN JERONIMO       SANTIAGO       RONDOCAN           ANTA      ANCAHUASI 
##              3              1              2              1              2              1              1 
##     HUAROCONDO         ZURITE          PISAC         CHECCA    SANTO TOMAS       LIVITACA         LLUSCO 
##              1              1              1              2              3              1              1 
##        ESPINAR         PARURO     HUANOQUITE   PACCARITAMBO         CAICAY     COLQUEPATA      CHINCHERO 
##              1              1              1              1              9              5              1 
##          MARAS           <NA> 
##              1           2687 
## [1] "Frequency table after encoding"
## district_fixed. ¿Entonces en qué distrito vives?
##  375  376  377  378  379  380  381  382  383  384  385  386  387  388  389  390  391  392  393  394  395  396 
##    1    3    1    1    1    9    2    1    1    2    1    8    5    1    1    1    3    1   28    1    1    2 
##  397  398  399  400  401  402  403 <NA> 
##    1    1    1    1    1    1    1 2687 
## [1] "Frequency table before encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la 
##         1031574 1117704 1270214 1274398 1320647 1321421 1327287 1339472 1341585 1343573 1343581 1344639 
##    2281       1       3       1       1       4       4       1       1       1       4       3       4 
## 1345024 1347301 1347921 1347939 1347970 1364868 1364900 1369248 1370378 1371095 1372507 1374438 1379320 
##       2       2       1       6       8       4       2       1       1       1       3       2       1 
## 1380021 1380120 1386432 1388651 1389279 1389303 1390095 1390467 1390517 1392083 1392091 1392117 1392125 
##       1       1       1       1       2       1       1       1       3       2       3       1       4 
## 1392174 1392224 1392232 1392240 1392257 1393313 1396191 1396209 1396225 1396852 1396886 1398932 1401934 
##       3       9       1       3       3       1       1       4       9       3       6       1       6 
## 1401942 1401959 1402536 1412873 1415983 1423003 1452705 1458348 1470582 1523802 1523810 1523828 1540988 
##       2      10       3       1       1       3       5       2       1       4       5       3       7 
## 1540996 1625532 1625557 1625573 1637263 1659101  207407  233056  233130  236158  236174  236422  236430 
##       2       1       4       6       1       4       2       2       3       3       1       1       1 
##  236463  236646  236653  236661  236927  309286  309294  309419  309567  309641  309716  310433  310441 
##       2       3       1       9       1       1       9       3       3       5       1       1       1 
##  477828  489096  495069  495325  517581  518084  518241  518472  519678  525923  579243  579292  579300 
##       1       2       3       7      10       6       1       3       2      14       1       5       6 
##  579409  587055  587204  589804  591164  591255  591602  612051  612507  617787  617829  621391  637272 
##       1       2      13       1       1       3       1       2      10       6       7       7      15 
##  647446  680082  680124  699603  712778  730515  783696  783704  783720  783787  783795  785097  791319 
##       6       1      11       3       1       1       7       1       1       5       4       1      19 
##  891408  891788  894915  927814  927871  929638  930859  931063  931436  932236  932608  933283  933317 
##       7       1       1       1       2       1       1       6       1       3       3       3       4 
##  933556  933598  933846  934141 
##       3       1       5       2 
## [1] "Frequency table after encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la 
##  488  489  490  491  492  493  494  495  496  497  498  499  500  501  502  503  504  505  506  507  508  509 
## 2281    1    1    5    1    6    3    1    2    6    1    5    1    4    1    3    1    3    1    3    7    7 
##  510  511  512  513  514  515  516  517  518  519  520  521  522  523  524  525  526  527  528  529  530  531 
##    1    1    2    3    1    1    1    1    5    1    3    6    1    1    1    2    3    2    4    3    1    2 
##  532  533  534  535  536  537  538  539  540  541  542  543  544  545  546  547  548  549  550  551  552  553 
##    1    4    4   14    4    1    6    1    3    1    1    2    6    3    9    3    1    1   15    1    2    1 
##  554  555  556  557  558  559  560  561  562  563  564  565  566  567  568  569  570  571  572  573  574  575 
##    4    1    3    3    4    2    1   11    6    8    1    4    1    4    3    5    1    3    1    2    2    1 
##  576  577  578  579  580  581  582  583  584  585  586  587  588  589  590  591  592  593  594  595  596  597 
##    2    1    4    1    3    1    6   10    7    1    1    3    3    2   13    1    1    3    1    3    2    3 
##  598  599  600  601  602  603  604  605  606  607  608  609  610  611  612  613  614  615  616  617  618  619 
##    2    4    6    1    1    3    1    1    1   10    1    1    5    9   10    7    1    7    4    2    1    2 
##  620  621  622  623  624  625  626  627  628  629  630  631  632  633  634 
##    3    9    1    1    1    2    5    7    6    3    9   19    1    3    2 
## [1] "Frequency table before encoding"
## school2014_name1. Seleccione la escuela a la que asistió en el año escolar del 2014. . Asegúrese q
##         1117944 1342294 1377209 1398858 1399443 1412634 1694637  204800  204875  204909  204925  205005 
##    2001       1       1       7       2       1       3       2       3       3       5       1       1 
##  205047  205153  205682  205690  205773  205781  205815  206334  216341  220285  226704  232207  232231 
##       6       6       6       6       1       2       3       3       4       1       1       3       2 
##  232249  232264  232504  232512  232538  232546  232553  232561  232579  232587  232595  232603  232611 
##       1       3       1       5       1       4       4       3       2       5       2       3       1 
##  232645  232728  232777  233015  233676  233718  233734  233825  233833  233890  233965  233973  233981 
##       2       3       6       1       2       2      10       9       1       1       2       2       2 
##  233999  234021  234039  234047  234062  234096  234112  234120  234138  234161  234187  234195  234203 
##       7       8       1       1       8       3       9      10       2       3       3       8       7 
##  234211  234229  234237  234369  234377  234385  234401  234427  234435  234443  234450  234476  234500 
##       3       4       1       9       6       2       4       1       2      11       4       1       8 
##  234575  234583  234674  234682  234781  234831  234856  287409  287417  287425  287466  287813  307579 
##       1       1       8       3       1       6       9       9       1       1       2       1       1 
##  312090  312215  312306  312421  312868  313239  313395  313460  313890  313908  313965  313973  313981 
##       3       4       4       6       4       1       1       1       7       4       2       1       1 
##  313999  314096  314187  314211  314237  314245  314252  314260  314278  314294  405001  405050  405258 
##       1       4       1       4       1       4       5       4       3       6       1       1       3 
##  405498  405704  405738  405746  405753  405852  405894  405902  405910  405936  406009  406041  406058 
##       1       1       5       3       2       1       1       3       1       4       3       1       1 
##  406066  406116  406124  406140  406223  406249  406264  406397  406413  406629  406637  406983  407007 
##       3       4       2       1       1       1       2       1       1       5       7       3       9 
##  407049  408229  408237  408245  408278  408286  408294  408336  408344  408393  408468  408476  408484 
##       4       1       2       2       2       1       2       1       7       3       5       6       4 
##  408492  408526  408542  408559  408583  408609  408666  408716  408732  408757  408773  408823  408856 
##       3       1       2       3       5       4       4       1       7       1       5       4       5 
##  408922  408955  408971  409003  409011  409029  409193  409235  409243  409284  409292  409300  409318 
##       7       4       5       1       5       1       2       5       3       3       1       6       1 
##  409359  409441  409565  409896  410480  410670  410738  410779  481283  499863  502922  517888  519496 
##       5       4       3       2       3       3       1       1       2       7       1       7       5 
##  519595  550392  551309  557587  585885  587089  587147  592634  612291  612416  612655  615013  623017 
##       4       5       2       5       4       1       3       1       1       1       1       2       6 
##  623041  637215  647388  647412  647628  671628  678839  678904  678961  680058  712562  712711  723031 
##       1       2       5       1       3       2       1       2       2       4       4       2       1 
##  730655  731273  731596  735480  735498  736033  736116  775700  783423  783597  796888  818674  818708 
##       2       8       4       1       1       1       1       5       5       1       3       3       2 
##  844159  844183  899351  930958  932434  932491  932848 
##       2       3       2      11       1       2       5 
## [1] "Frequency table after encoding"
## school2014_name1. Seleccione la escuela a la que asistió en el año escolar del 2014. . Asegúrese q
##  645  646  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663  664  665  666 
##    3    1    5    1    1    2    1    4    2    2    6    1    1    4    1 2001    4    3    2    2    1    8 
##  667  668  669  670  671  672  673  674  675  676  677  678  679  680  681  682  683  684  685  686  687  688 
##    1    4    2    5    7    2    1    1    2    7    1    1    3    8    1    7    1   11    6    4    1    2 
##  689  690  691  692  693  694  695  696  697  698  699  700  701  702  703  704  705  706  707  708  709  710 
##    7    6    3    1    1    5    4    6    3    4    1    6    9    6    1    6    2    3    2    4    2    1 
##  711  712  713  714  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731  732 
##    2    1    5    1    2    1    1    1    4    4    2    1    2    3    2    2    3    3    3    8    1    1 
##  733  734  735  736  737  738  739  740  741  742  743  744  745  746  747  748  749  750  751  752  753  754 
##    3    2    3    1    3    5    1    7    2    2    1    1    3   10    2    5    6    3    4    1    1    1 
##  755  756  757  758  759  760  761  762  763  764  765  766  767  768  769  770  771  772  773  774  775  776 
##    1    7    4    5    3    1    1    1    3    2    3    2    5    2    5    2    2    7    4    3    1    1 
##  777  778  779  780  781  782  783  784  785  786  787  788  789  790  791  792  793  794  795  796  797  798 
##    5    1    8    1    3    3    1    1    1    9    3    3    9    7    2    3    1    1    7    1    4    3 
##  799  800  801  802  803  804  805  806  807  808  809  810  811  812  813  814  815  816  817  818  819  820 
##    3    5    1    4    3    4    2    1    2    2    3    4    4    1    1    5    2    8    4    2    3    2 
##  821  822  823  824  825  826  827  828  829  830  831  832  833  834  835  836  837  838  839  840  841  842 
##    1    6    1    3    1   10    5    1    4    1    4    2    2    5   11    5    1    9    1    6    2    4 
##  843  844  845  846  847  848  849  850  851  852  853  854  855  856  857  858  859  860  861  862  863  864 
##    1    8    1    1    4    1    1    4    1    6    5    1    5    1    3    2    4    4    2    4    9    3 
##  865  866  867  868  869  870  871  872  873  874  875  876  877  878  879  880  881  882  883  884  885 
##    1    7    3    1    1    1    1    6    4    5    1    3    5    5    2    5    1    3    1    9    3 
## [1] "Frequency table before encoding"
## school2013_name1. Seleccione la escuela a la que asistió en el año escolar del 2013. Si la escuela
##         1117944 1342294 1377209 1398858 1399443 1412634 1423201 1694637  204800  204875  204909  204925 
##    1979       1       2       7       1       1       4       1       2       4       3       5       2 
##  205005  205047  205153  205682  205690  205773  205781  205815  205880  206326  206334  216341  219188 
##       1       6       6       6       5       1       3       3       1       1       3       4       1 
##  220285  226704  226894  232207  232231  232249  232264  232504  232512  232538  232546  232553  232561 
##       1       1       1       3       2       2       3       1       5       1       4       4       3 
##  232579  232587  232595  232603  232611  232645  232728  232777  233015  233668  233676  233718  233734 
##       2       5       2       4       1       2       3       6       1       1       2       2      10 
##  233759  233825  233833  233890  233965  233973  233981  233999  234021  234039  234047  234062  234096 
##       1       9       1       2       2       2       2       6       8       1       1       8       3 
##  234112  234120  234138  234161  234187  234195  234203  234211  234229  234237  234369  234377  234385 
##       9      10       2       3       3       8       7       3       4       1      10       6       2 
##  234401  234427  234435  234443  234450  234500  234575  234583  234674  234682  234781  234831  234856 
##       5       1       2      11       4       8       1       1       8       3       1       6       8 
##  287409  287417  287425  287466  287813  311878  312090  312215  312306  312421  312629  312868  313239 
##       9       1       1       2       1       1       3       4       4       6       1       4       1 
##  313395  313460  313890  313908  313965  313973  313981  313999  314096  314187  314211  314237  314245 
##       1       1       7       5       2       1       1       1       4       1       4       1       5 
##  314252  314260  314278  314294  405001  405050  405076  405258  405498  405704  405738  405746  405753 
##       5       4       3       6       1       1       1       4       1       1       4       3       2 
##  405894  405902  405936  406009  406033  406041  406066  406116  406124  406140  406223  406249  406264 
##       1       3       4       3       1       1       2       3       2       1       1       1       1 
##  406371  406413  406629  406637  406983  407007  407049  408237  408245  408278  408286  408294  408344 
##       2       1       6       8       4       7       2       1       1       2       1       2       7 
##  408351  408385  408393  408468  408476  408484  408492  408526  408542  408559  408583  408591  408609 
##       1       1       4       5       6       4       4       2       2       3       5       1       4 
##  408666  408716  408732  408757  408773  408823  408856  408922  408955  408971  409003  409011  409029 
##       5       1       7       1       4       4       5       7       4       5       1       6       1 
##  409193  409235  409243  409284  409292  409300  409318  409359  409441  409557  409565  409896  410480 
##       3       5       3       3       1       5       1       5       4       1       1       2       3 
##  410670  410704  410738  410779  481283  486688  499863  502922  517888  519496  519595  550392  551309 
##       2       1       1       1       2       1       7       1       7       5       5       5       2 
##  557587  585885  587089  587147  592634  612291  612416  612655  612804  615013  623017  623041  637215 
##       5       4       1       2       1       1       2       1       1       2       6       1       2 
##  647305  647388  647412  647628  671628  678839  678904  678961  680058  712562  712711  723031  730655 
##       1       5       1       3       2       1       1       2       4       4       1       1       2 
##  731273  731596  735480  735498  736033  736116  745612  775700  783423  783597  796888  818674  818708 
##       8       4       1       1       2       1       1       5       5       1       3       4       2 
##  818880  844159  844183  844266  899351  930958  932434  932491  932848  932871 
##       1       2       3       1       2      11       1       2       6       1 
## [1] "Frequency table after encoding"
## school2013_name1. Seleccione la escuela a la que asistió en el año escolar del 2013. Si la escuela
##  513  514  515  516  517  518  519  520  521  522  523  524  525  526  527  528  529  530  531  532  533  534 
##    1    4    4    5    3    1    1    2    2    5    9    1    5    5    1    2    3 1979    3    1    4    2 
##  535  536  537  538  539  540  541  542  543  544  545  546  547  548  549  550  551  552  553  554  555  556 
##    8    2    5    5    1    3    9    1    5    1    1    5    1    5    2    1    2    1    4    2    1    6 
##  557  558  559  560  561  562  563  564  565  566  567  568  569  570  571  572  573  574  575  576  577  578 
##    1    1    1    3    1    1   10    2    2    1    1    3    4    4    1    4    5    2    4    2    6    2 
##  579  580  581  582  583  584  585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600 
##    1    1    1    3    1    6    5    6    2    3    2    1    1    4    1    6   11    6    4    1    5    1 
##  601  602  603  604  605  606  607  608  609  610  611  612  613  614  615  616  617  618  619  620  621  622 
##    7    1    1    1    1    1    5    1   10    3   10    1    1    4    3    6    1    4    1    3    1    6 
##  623  624  625  626  627  628  629  630  631  632  633  634  635  636  637  638  639  640  641  642  643  644 
##    1    8    2    1    2    1    2    4    4    1    2    3    4    3    3    2    4    1    3    5    5    1 
##  645  646  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663  664  665  666 
##    2    1    1    4    4   11    7    4    1    6    1    2    4    1    1    1    1    6    3    2    6    2 
##  667  668  669  670  671  672  673  674  675  676  677  678  679  680  681  682  683  684  685  686  687  688 
##    2    5    1    2    2    2    1    2    4    1    1    1    1    1    4    7    1    6    1    5    1    1 
##  689  690  691  692  693  694  695  696  697  698  699  700  701  702  703  704  705  706  707  708  709  710 
##    4    8    3    3    1    4    1    1    2    3    7    1    4    8    4    5    3    7    1    8    9    3 
##  711  712  713  714  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731  732 
##    1    1    5    2    7    8    2    2    1    3    2    1    1    5    2    2    2    1    5    6    1    3 
##  733  734  735  736  737  738  739  740  741  742  743  744  745  746  747  748  749  750  751  752  753  754 
##    6    2    4    3    2    1    4    8    1    1    1    5    2    7    2    3    1    4    1    3    5    2 
##  755  756  757  758  759  760  761  762  763  764  765  766  767  768  769 
##    1    7    2    1    1    1    3    4    7    2    8    4    1    1    1
# !!!Remove as contain identifying information

dropvars <- c("i9a",
              "school2016",
              "school_fixed",
              "p11b",
              "school2014_name",
              "school2013_name",
              "district_fixed1",
              "centro_poblado")
mydata <- mydata[!names(mydata) %in% dropvars]

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

# !!!Remove as contain identifying information

dropvars <- c("i15",
              "i16",
              "i16a",
              "i18_fixed")
mydata <- mydata[!names(mydata) %in% dropvars]


# Top code days absent from school (5 or more)

mydata2 <- top_recode ("p12b", break_point=5, missing=c(888, 999999)) # Topcode cases with 5 or more adult household members. 
## [1] "Frequency table before encoding"
## p12b. ¿En el mes pasado cuántos días faltaste a la escuela?
##    0    1    2    3    4    5    6    7    9   10 <NA> 
## 1709  501  289  127   33   49    5    5    1    5   45

## [1] "Frequency table after encoding"
## p12b. ¿En el mes pasado cuántos días faltaste a la escuela?
##         0         1         2         3         4 5 or more      <NA> 
##      1709       501       289       127        33        65        45

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("i10",
                  "i12",
                  "i23",
                  "dropout",
                  "school_fixed_level",
                  "do_grade2015_fixed",
                  "do_approved_grade2015",
                  "do_2015_fixed",
                  "dropout_approved_fixed",
                  "p1a",
                  "p1a_1",
                  "p1a_2",
                  "p1a_3",
                  "dout_reasons",
                  "dout_reasons_2",
                  "dout_reasons_3",
                  "dout_reasons_4",
                  "dout_reasons_5",
                  "dout_reasons_6",
                  "dout_reasons_7",
                  "dout_reasons_8",
                  "dout_reasons_9",
                  "dout_reasons_10",
                  "dout_reasons_11",
                  "dout_reasons_12",
                  "dout_reasons_13",
                  "dout_reasons_14",
                  "dout_reasons_98",
                  "dout_reasons_99",
                  "dout_decision",
                  "p20",
                  "same_school2015",
                  "switcher_2016",
                  "asissted_2014",
                  "same_school2014",
                  "switcher_2015",
                  "dout2014",
                  "dout2014_1",
                  "dout2014_2",
                  "dout2014_3",
                  "dout2014_4",
                  "dout2014_5",
                  "dout2014_6",
                  "dout2014_7",
                  "dout2014_8",
                  "dout2014_9",
                  "dout2014_10",
                  "dout2014_11",
                  "dout2014_12",
                  "dout2014_13",
                  "dout2014_14",
                  "dout2014_98",
                  "dout2014_99",
                  "asissted_2013",
                  "same_school2013",
                  "switcher_2014",
                  "dout2013",
                  "dout2013_1",
                  "dout2013_2",
                  "dout2013_3",
                  "dout2013_4",
                  "dout2013_5",
                  "dout2013_6",
                  "dout2013_7",
                  "dout2013_8",
                  "dout2013_9",
                  "dout2013_10",
                  "dout2013_11",
                  "dout2013_12",
                  "dout2013_13",
                  "dout2013_14",
                  "dout2013_98",
                  "dout2013_99",
                  "a2",
                  "a2b",
                  "a2c",
                  "a2d",
                  "a3",
                  "a3b",
                  "a3c",
                  "a3d",
                  "a4",
                  "a4b",
                  "a4c",
                  "a4d",
                  "a5",
                  "a5b",
                  "a5c",
                  "a5d",
                  "a6",
                  "a6b",
                  "a6c",
                  "a6d",
                  "a7",
                  "a7b",
                  "a7c",
                  "a7d",
                  "a8",
                  "a8b",
                  "a8c",
                  "a8d",
                  "a9",
                  "a9b",
                  "a9c",
                  "a9d",
                  "a10",
                  "a10b",
                  "a10c",
                  "a10d",
                  "a11",
                  "a11b",
                  "a11c",
                  "a11d",
                  "m2",
                  "m2b",
                  "m2c",
                  "m2d",
                  "m3",
                  "m3b",
                  "m3c",
                  "m3d",
                  "m4",
                  "m4b",
                  "m4c",
                  "m4d",
                  "m5",
                  "m5b",
                  "m5c",
                  "m5d",
                  "m6",
                  "m6b",
                  "m6c",
                  "m6d",
                  "m7",
                  "m7b",
                  "m7c",
                  "m7d",
                  "m8",
                  "m8b",
                  "m8c",
                  "m8d",
                  "m9",
                  "m9b",
                  "m9c",
                  "m9d",
                  "m10",
                  "m10b",
                  "m10c",
                  "m10d",
                  "m11",
                  "m11b",
                  "m11c",
                  "m11d",
                  "p22a",
                  "p22b",
                  "p25_note",
                  "p25a1",
                  "p25a2",
                  "p25a3",
                  "p25b",
                  "p25c",
                  "p25d",
                  "p25e",
                  "p25_1_note1",
                  "p25_1a",
                  "p25_1b",
                  "p25_1c",
                  "p25_1d",
                  "p25_1e",
                  "p25_1f",
                  "p25_2g",
                  "p25_3h",
                  "p25_4i",
                  "p25_5j",
                  "p25_6k",
                  "p25_7l",
                  "p25_8m",
                  "p25_9n",
                  "p25_10o",
                  "p25_11p",
                  "p25_12q",
                  "p25_13r",
                  "p25_14s",
                  "p25_14t",
                  "p25_2_note",
                  "p25_2a",
                  "p25_2b",
                  "p25_2c",
                  "p25_2d",
                  "p25_2e",
                  "p25_2f",
                  "p25_2g1",
                  "p25_2h",
                  "p25_2i",
                  "p27_note",
                  "p27a",
                  "p27b",
                  "p27c",
                  "p27d",
                  "p27e")

capture_tables (indirect_PII)


# Recode those with very specific values. 

mydata$p1a[mydata$p1a == "1 3"] <- "Otros"
mydata$p1a[mydata$p1a == "2 3"] <- "Otros"


break_activity <- c(-98,1,2,3,4,99)
labels_activity <- c("No se"=1,
                     "Porque mi escuela anterior no tenia nivel secundario"=2,
                     "Otro"=3,
                     "Otro"=4,
                     "Porque mi nueva escuela es mejor que mi antigua escuela"=5,
                     "Otro"=6)
mydata <- ordinal_recode (variable="switcher_2016", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## switcher_2016. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No sé 
##                                                       1 
##    Porque mi escuela anterior no tenía nivel secundario 
##                                                     961 
##       Porque mi nueva escuela está más cerca de mi casa 
##                                                      21 
##           Porque me mudé a otro centro poblado/distrito 
##                                                      18 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      43 
##                                                    Otro 
##                                                      31 
##                                                    <NA> 
##                                                    1694 
##      recoded
##       [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
##   -98       1     0     0     0      0          0
##   1         0   961     0     0      0          0
##   2         0     0    21     0      0          0
##   3         0     0     0    18      0          0
##   4         0     0     0     0     43          0
##   99        0     0     0     0      0         31
## [1] "Frequency table after encoding"
## switcher_2016. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No se 
##                                                       1 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                     961 
##                                                    Otro 
##                                                      70 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      43 
##                                                    <NA> 
##                                                    1694 
## [1] "Inspect value labels and relabel as necessary"
##                                                   No se 
##                                                       1 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                       2 
##                                                    Otro 
##                                                       3 
##                                                    Otro 
##                                                       4 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                       5 
##                                                    Otro 
##                                                       6
labels_activity <- c("No se"=1,
                     "Porque mi escuela anterior no tenia nivel secundario"=2,
                     "Otro"=3,
                     "Otro"=4,
                     "Porque mi nueva escuela es mejor que mi antigua escuela"=5,
                     "Otro"=6)
mydata <- ordinal_recode (variable="switcher_2015", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## switcher_2015. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No sé 
##                                                       2 
##    Porque mi escuela anterior no tenía nivel secundario 
##                                                     971 
##       Porque mi nueva escuela está más cerca de mi casa 
##                                                      28 
##           Porque me mudé a otro centro poblado/distrito 
##                                                      23 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      48 
##                                                    Otro 
##                                                      47 
##                                                    <NA> 
##                                                    1650 
##      recoded
##       [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
##   -98       2     0     0     0      0          0
##   1         0   971     0     0      0          0
##   2         0     0    28     0      0          0
##   3         0     0     0    23      0          0
##   4         0     0     0     0     48          0
##   99        0     0     0     0      0         47
## [1] "Frequency table after encoding"
## switcher_2015. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No se 
##                                                       2 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                     971 
##                                                    Otro 
##                                                      98 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      48 
##                                                    <NA> 
##                                                    1650 
## [1] "Inspect value labels and relabel as necessary"
##                                                   No se 
##                                                       1 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                       2 
##                                                    Otro 
##                                                       3 
##                                                    Otro 
##                                                       4 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                       5 
##                                                    Otro 
##                                                       6
labels_activity <- c("No se"=1,
                     "Porque mi escuela anterior no tenia nivel secundario"=2,
                     "Porque mi nueva escuela esta mas cerca de mi casa"=3,
                     "Otro"=4,
                     "Porque mi nueva escuela es mejor que mi antigua escuela"=5,
                     "Otro"=6)
mydata <- ordinal_recode (variable="switcher_2014", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## switcher_2014. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No sé 
##                                                       6 
##    Porque mi escuela anterior no tenía nivel secundario 
##                                                     960 
##       Porque mi nueva escuela está más cerca de mi casa 
##                                                      35 
##           Porque me mudé a otro centro poblado/distrito 
##                                                      29 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      56 
##                                                    Otro 
##                                                      60 
##                                                    <NA> 
##                                                    1623 
##      recoded
##       [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
##   -98       6     0     0     0      0          0
##   1         0   960     0     0      0          0
##   2         0     0    35     0      0          0
##   3         0     0     0    29      0          0
##   4         0     0     0     0     56          0
##   99        0     0     0     0      0         60
## [1] "Frequency table after encoding"
## switcher_2014. ¿Por qué te cambiaron/ te cambiaste de escuela?
##                                                   No se 
##                                                       6 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                     960 
##       Porque mi nueva escuela esta mas cerca de mi casa 
##                                                      35 
##                                                    Otro 
##                                                      89 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                      56 
##                                                    <NA> 
##                                                    1623 
## [1] "Inspect value labels and relabel as necessary"
##                                                   No se 
##                                                       1 
##    Porque mi escuela anterior no tenia nivel secundario 
##                                                       2 
##       Porque mi nueva escuela esta mas cerca de mi casa 
##                                                       3 
##                                                    Otro 
##                                                       4 
## Porque mi nueva escuela es mejor que mi antigua escuela 
##                                                       5 
##                                                    Otro 
##                                                       6

Matching and crosstabulations: Run automated PII check

# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('i17', 'grado','i12') ##!!! Replace with candidate categorical demo vars

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2769 rows and 948 variables.
##   --> Categorical key variables: i17, grado, i12
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size            Size of smallest (>0)       
##           i17                   14 (14)   197.786  (197.786)                     1    (1)
##         grado                    4  (4)   692.250  (692.250)                    16   (16)
##           i12                    3  (3)  1376.500 (1376.500)                  1338 (1338)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 14 (0.506%)
##   - 3-anonymity: 18 (0.650%)
##   - 5-anonymity: 31 (1.120%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 14 x 3
##    i17   grado        i12
##    <chr> <chr>  <dbl+lbl>
##  1 1997  9     2 [Mujer] 
##  2 1997  10    1 [Hombre]
##  3 2001  8     2 [Mujer] 
##  4 1974  8     1 [Hombre]
##  5 2000  8     1 [Hombre]
##  6 2006  9     1 [Hombre]
##  7 2006  10    2 [Mujer] 
##  8 2000  8     2 [Mujer] 
##  9 2007  9     1 [Hombre]
## 10 1998  10    2 [Mujer] 
## 11 2001  8     1 [Hombre]
## 12 1999  8     2 [Mujer] 
## 13 1995  9     1 [Hombre]
## 14 1998  9     2 [Mujer]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables
extractManipData(sdcFinal)[notAnon,selectedKeyVars] #manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used
##       i17 grado i12
## 153  <NA>     9   2
## 475  <NA>    10   1
## 483  <NA>     8   2
## 967  <NA>     8   1
## 999  <NA>     8   1
## 1220 <NA>     9   1
## 1360 <NA>    10   2
## 1574 <NA>     8   2
## 1672 <NA>     9   1
## 1698 <NA>    10   2
## 1754 <NA>     8   1
## 1975 <NA>     8   2
## 2228 <NA>     9   1
## 2650 <NA>     9   2
mydata [notAnon,"i12"] <- NA
mydata [notAnon,"grado"] <- NA

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("dout_reasons_1",
"v108",
"p13c1",
"p35a",
"p35a1",
"p35b",
"p35b1",
"switcher_2016_otro",
"switcher_2015_otro",
"dout2014_otro",
"switcher_2014_otro",
"dout2013_otro",
"a2_o",
"a2g",
"a3_o",
"a3g",
"a4_o",
"a4g",
"a5_o",
"a5g",
"a6_o",
"a6g",
"a7_o",
"a7g",
"a8_o",
"a8g",
"a9_o",
"a9g",
"a10_o",
"a10g",
"a11_o",
"a11g",
"m2_o",
"m2g",
"m3_o",
"m3g",
"m4_o",
"m4g",
"m5_o",
"m5g",
"m6_o",
"m6g",
"m7_o",
"m7g",
"m8_o",
"m8g",
"m9_o",
"m9g",
"m10_o",
"m10g",
"m11_o",
"m11g",
"q48",
"p1a")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 
# !!! Remove, as they contain a lot of sensitive information and they are in Spanish.

mydata <- mydata[!names(mydata) %in% "dout_reasons_1"]
mydata <- mydata[!names(mydata) %in% "v108"]
mydata <- mydata[!names(mydata) %in% "p13c1"]
mydata <- mydata[!names(mydata) %in% "p35a"]
mydata <- mydata[!names(mydata) %in% "p35a1"]
mydata <- mydata[!names(mydata) %in% "p35b"]
mydata <- mydata[!names(mydata) %in% "p35b1"]
mydata <- mydata[!names(mydata) %in% "switcher_2016_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2015_otro"]
mydata <- mydata[!names(mydata) %in% "dout2014_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2014_otro"]
mydata <- mydata[!names(mydata) %in% "dout2013_otro"]
mydata <- mydata[!names(mydata) %in% "a2_o"]
mydata <- mydata[!names(mydata) %in% "a2g"]
mydata <- mydata[!names(mydata) %in% "a3_o"]
mydata <- mydata[!names(mydata) %in% "a3g"]
mydata <- mydata[!names(mydata) %in% "a4_o"]
mydata <- mydata[!names(mydata) %in% "a4g"]
mydata <- mydata[!names(mydata) %in% "a5_o"]
mydata <- mydata[!names(mydata) %in% "a5g"]
mydata <- mydata[!names(mydata) %in% "a6_o"]
mydata <- mydata[!names(mydata) %in% "a6g"]
mydata <- mydata[!names(mydata) %in% "a7_o"]
mydata <- mydata[!names(mydata) %in% "a7g"]
mydata <- mydata[!names(mydata) %in% "a8_o"]
mydata <- mydata[!names(mydata) %in% "a8g"]
mydata <- mydata[!names(mydata) %in% "a9_o"]
mydata <- mydata[!names(mydata) %in% "a9g"]
mydata <- mydata[!names(mydata) %in% "a10_o"]
mydata <- mydata[!names(mydata) %in% "a10g"]
mydata <- mydata[!names(mydata) %in% "a11_o"]
mydata <- mydata[!names(mydata) %in% "a11g"]
mydata <- mydata[!names(mydata) %in% "m2_o"]
mydata <- mydata[!names(mydata) %in% "m2g"]
mydata <- mydata[!names(mydata) %in% "m3_o"]
mydata <- mydata[!names(mydata) %in% "m3g"]
mydata <- mydata[!names(mydata) %in% "m4_o"]
mydata <- mydata[!names(mydata) %in% "m4g"]
mydata <- mydata[!names(mydata) %in% "m5_o"]
mydata <- mydata[!names(mydata) %in% "m5g"]
mydata <- mydata[!names(mydata) %in% "m6_o"]
mydata <- mydata[!names(mydata) %in% "m6g"]
mydata <- mydata[!names(mydata) %in% "m7_o"]
mydata <- mydata[!names(mydata) %in% "m7g"]
mydata <- mydata[!names(mydata) %in% "m8_o"]
mydata <- mydata[!names(mydata) %in% "m8g"]
mydata <- mydata[!names(mydata) %in% "m9_o"]
mydata <- mydata[!names(mydata) %in% "m9g"]
mydata <- mydata[!names(mydata) %in% "m10_o"]
mydata <- mydata[!names(mydata) %in% "m10g"]
mydata <- mydata[!names(mydata) %in% "m11_o"]
mydata <- mydata[!names(mydata) %in% "m11g"]
mydata <- mydata[!names(mydata) %in% "q48"]

GPS data: Displace

!!! No GPS data

Save processed data in Stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)