rm(list=ls(all=t))

Setup filenames

filename <- "ECE_working_NOPII_Version" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("dni",
              "nombres",
              "apellido_paterno",
              "apellido_materno",
              "paterno_ece16",
              "materno_ece16",
              "nombre1_ece16",
              "nombre2_ece16") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("cod_mod")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## cod_mod. cod_mod7
## 207449 207472 207480 207506 209270 209304 209387 209510 209528 209536 209908 209916 209924 
##    231     92     42     69    157     64     94    224     41    217    183     29    247 
## 209940 209965 233056 233064 233130 233270 233288 236109 236117 236174 236182 236224 236232 
##    142    282    386    160    168    105     73    263    255    247     70    202     46 
## 236349 236364 236414 236778 236786 238667 238675 238808 238840 239798 239814 239822 240184 
##    194     42    125    120    109    128    268     53    405     57    181    104    243 
## 240259 240267 242271 242289 245647 245654 245662 245670 245688 245696 245704 262188 262196 
##    193    232    211    246    157    178    110    183    230    152     61    198    111 
## 273516 273524 275438 275479 275487 275545 275552 275719 290569 290585 290601 290619 290874 
##    226    183    419     84     59     73    370    240    391    212    217     68    195 
## 290890 302711 302885 302893 302943 302950 302968 304444 305656 309187 309229 309260 309302 
##    155     46    396    149    306    395    144    396     76    223    284    192     66 
## 309310 309336 309377 309385 309401 309435 309443 309468 309492 309500 309526 309633 309641 
##    107     67    120    114     43     90     46    122    119     53     36     59    124 
## 309682 309773 309781 309799 309807 309823 309856 309898 310185 310219 310441 310516 322479 
##     20    259    193    216     99    113     30     48     25     83     34    157     75 
## 323345 324772 325449 325456 325464 325472 325480 325498 325506 325548 325555 325563 325589 
##     38     76     58    159    211    326    190    199     26    163    100     36    142 
## 325597 325605 325613 325621 325639 325647 325654 325662 325670 325696 325704 325712 327379 
##    166    148    266    128     87    270     35    245    107     98    237    120    121 
## 327486 327650 329326 329573 329755 329805 329813 330464 334649 334656 334664 334672 334680 
##     80    129    159    193    160     75     67     31    108    198    223     15     12 
## 334706 334714 334722 334730 334748 334771 336495 336511 336537 336545 336578 336586 336594 
##    296     11    231     81    109     18    282     22     94    238     46    195     82 
## 336602 336610 336628 336636 336891 337436 337568 337592 337717 337733 337741 337766 340224 
##     19    312    284     16    200    145    243     17     22     59     13     49    178 
## 340281 340299 340315 340323 340331 340349 340356 340364 340372 340380 340398 340414 340422 
##     52    129    124     84     28    294     37     43     70     98     44    224    124 
## 340430 340448 340463 343566 355875 355883 355891 355990 356071 356089 356188 356329 356634 
##    175    145     11    121     68     34     11    429    147    193    238    168     46 
## 356758 356766 372508 372540 372565 372581 372599 372680 372698 372946 373761 373787 375758 
##    157     48    491    309    226    298    510    437     72     56    537    322     97 
## 390591 390609 390617 390690 390708 391060 391078 394288 394353 394445 394775 394825 395061 
##    517    367     80    167    299    188    141    110     97    100    402     71    244 
## 395079 395087 395103 395111 395152 395228 395236 395251 395269 411736 421248 421396 424507 
##    107    114    135     45    145    390    469    190    335    201    239     70    498 
## 424523 424564 424572 424580 424606 424770 427427 427690 427708 427716 427955 427971 432773 
##    203     55    119    256    234     57     69    219    145     42     12     27    134 
## 436642 437210 437228 437236 437244 437251 437269 437277 437285 437293 437319 437327 437335 
##     67     50    315    141    232    265    387    167    187     74     62     26     52 
## 437343 437350 437368 437400 437509 437517 437525 437533 437541 437707 437715 437723 437731 
##    269    128     24    102    105    185     91    106    129     96     66    109    142 
## 437749 437772 449819 449827 449868 452565 452573 452599 452623 452631 452649 452656 452722 
##    259     38     78     60    223    454    440    292    288     85    301    175     38 
## 452730 452748 452763 452771 452847 452854 452862 453597 453605 453647 466383 466722 466730 
##    121     68     82    149    188    259     29    333    231    193     28     69     62 
## 469205 469700 472209 472472 478479 481184 481242 481820 481853 481903 482042 488619 488635 
##    180     77     33     28    188     86     71    150     65     48     79    110     31 
## 489096 492504 492769 492876 493544 493635 493742 493841 494070 495150 495259 495424 495762 
##     71    136     97     97    115    215    117     61     62     31     91    135    133 
## 495812 496166 496265 497081 498782 498824 498881 498998 499228 499699 500124 500330 500348 
##    121     60     58      5     42    177     24    108     60    246     67     46     49 
## 500611 501411 501502 501601 501676 501700 501809 501908 502047 502104 502336 502435 502484 
##    132     77    191    128     28     99     40     18     86     36    147    201     73 
## 502633 504993 505149 507806 508168 508267 510396 515668 516963 517698 518241 519645 520064 
##     99    297    180     17     22    113      9     57     81     24    119    167    158 
## 520486 521179 522318 522862 523423 523431 523472 523621 524637 525725 533752 535195 535666 
##    298    108     80    144    111     17    163    272     83     71     67     67     28 
## 535724 535823 535930 536029 536128 536151 536326 536714 537761 542357 544957 545053 545251 
##    129    156    100    108     96    200    111     85    125     54    171    177     15 
## 545459 545509 546002 546986 547083 547463 547877 547976 550749 552612 553412 553420 553511 
##    178     85     94    186    121     40     71    175     12     89     70     26     31 
## 553529 554824 555847 555862 555946 556266 556332 556340 556449 556472 556571 564252 565119 
##     94    175     14     21    111     47     23     56     89     25     64     64     18 
## 565143 565176 565200 565234 565267 566141 566158 566166 566414 566422 566430 566448 566455 
##     82     98    155    140     84    158    104     38     76    109     58     58     94 
## 566463 566471 566489 566927 566950 567743 567750 568592 568618 568915 569053 569566 569988 
##     99    138     39     16      8     17     68     34    151     61    163     44     48 
## 570010 570044 570648 571620 572032 572768 573352 576363 576389 576439 576736 577346 577379 
##     92     64      7     30    164     47     11     37     19     39     42    237     29 
## 577387 577478 577486 577783 577817 577825 577833 577841 577858 577866 577874 577882 577890 
##    190    106    245     60     80    151     70    109     75    168     78     30    117 
## 577908 577916 578260 578278 578286 578294 578336 578351 578393 578401 578435 578443 578450 
##     52     65     32     35    152     51     25     20     20     16     27     58    133 
## 578468 578492 578500 578518 578526 578534 578542 578559 578773 578799 578823 579151 579177 
##     78     23    164    107     62    191     93     60     85     36     82    113     65 
## 579565 579573 579599 579607 579615 579623 579631 579649 579664 579672 579680 579698 579706 
##     36     88     34     39     12    112     44     40     32     27     44     71     10 
## 579714 579730 579748 579797 580514 580746 580753 580779 580803 580837 580928 580977 581710 
##     85     50     39    120    111     43     98    154     14     52     19     63     47 
## 581728 581736 581744 581777 581876 581884 581892 581900 581991 582114 582122 582148 582163 
##     41    126    164     52    160     54     37     61     57     26    117    387    179 
## 582171 582189 582254 582262 582304 582312 582387 582403 582411 582833 582866 582890 582932 
##    118    104    160     67    138    168     59    161     38     58     32     66    140 
## 582981 583013 583021 583088 583104 583203 583328 583443 583476 583500 583534 583567 583591 
##     16    118    124    118    137     57     54    185     89     85     96    110    107 
## 587485 589200 589234 591065 591131 591164 591198 591289 592816 594895 595298 596007 598581 
##     33    140     45     23    116     74     62     28     29     15     24      9     56 
## 598649 599159 599365 601492 603878 605469 605501 606392 606459 607143 607531 607549 607556 
##     50    132     65     26     79    246    139     16    145     59     67     41    123 
## 607697 609883 610683 612945 613638 614933 614966 614990 615690 615948 616185 616201 616433 
##    217    103     19     24     32     68     85     81     79    110     54     95     44 
## 616441 616466 616938 616961 617183 617191 617209 617217 617233 617290 618447 619361 622456 
##    160     90     39      9     56     24     57     32     29     56     22     18     63 
## 625830 626374 628842 632471 635284 635318 635334 635987 636019 636217 637132 637249 637306 
##    140     98    154     26    155     13    108     24    183     60     19     73     90 
## 639922 642801 642892 642926 643148 643163 643171 643221 643262 643668 643692 643783 643817 
##      1     76    198     52    127     31     86     48     93     48     31     48     24 
## 643841 643874 644690 644880 647057 647065 647172 649129 649483 649673 649731 649897 650002 
##     67    140     44     23    116    142     38    144     69     27    106     77     75 
## 650036 655746 655795 656447 656587 658716 659599 659623 659722 659896 659953 662940 662957 
##     14    114     61    155     71     21     75     39    225    101    162     66     93 
## 663005 663096 663112 663120 663138 663153 663534 663542 663559 663971 664292 664698 664706 
##     44     37     92     56     80    103     25     49     83    104     67    217    113 
## 664748 664912 664920 665265 665273 665281 665463 665471 665489 666438 667022 667394 668764 
##    186     31     96     32     80    155    126    121    219     14    155    115     96 
## 669341 671073 671438 672600 672618 672626 674374 675025 678581 678615 679670 681452 682229 
##     31    107     16     33     68    128     97     24     77     50     46     91    108 
## 682245 682252 682260 683946 685016 688283 690008 690024 690289 690297 691931 692434 692442 
##     26     12    177     14    122     80     52    121     78     27    272    126     78 
## 692459 692467 692707 693382 693499 693622 693630 693655 693663 694539 694547 694554 694562 
##     53     75     41    137    156     60     50    154     97     52    111     26     91 
## 694570 694588 694596 694604 695262 695270 695288 695296 695312 696948 697029 697045 701557 
##    118     65     79    140     74     13     33     68     50     36     16     49     11 
## 701755 703215 703223 703231 703249 703256 703744 703751 704072 704460 704593 705053 705160 
##     11    106     93    202    144    121    117     34     44     85     32     75     73 
## 705459 705509 705772 707646 709493 709527 709907 712885 712976 715961 716886 718718 719880 
##    116     26    113      8     73     60     29    133     46     80     91    101     23 
## 720235 723353 723486 723825 724815 725523 725739 725770 725861 727461 728055 728196 728337 
##     10     25     32     86     57     32     34     71     67     25     18     64    161 
## 728717 728907 730515 732321 732339 732347 732495 733147 735035 738542 738559 739557 742130 
##    185     25    114     69     56     51     79     46     59     14     38     91     20 
## 743179 743773 743781 743799 743807 743815 743831 744045 744565 744573 744870 744888 745448 
##     14     17     49     64    183     43     58     16     56     36     32     69     97 
## 745745 745752 749325 749358 749366 750083 750125 751230 753137 753178 756015 757922 757930 
##    119    124     39     46     54     44     32     57     65     23      9    105     53 
## 758078 759563 759571 759613 762773 762781 762849 762856 762864 762880 762906 762914 763169 
##     92     97     17     23     27    100     44    143    112    118    177    215     39 
## 763771 763789 764910 764928 764936 765297 765305 765313 765321 765362 765370 765396 765404 
##     36      7     78     31    209     55     18    112     79     64    153     73     21 
## 765412 766329 773788 773812 773846 774026 774455 774679 774703 774737 774760 774794 775320 
##     52    115     49     52     39     34     72    111     69     37    144     59    128 
## 775346 775874 775908 777243 777656 777680 777714 777995 778001 778027 778233 778738 778761 
##     85    145     42    112    101    125     84     49     73     27     63    151     55 
## 778795 779041 779868 780320 780759 781245 781278 781302 781336 781369 781930 782045 782078 
##    210    114     17     63     85     51     86     32    125    121    192     65     99 
## 782102 782664 782680 784512 785097 785873 785956 785964 787473 794412 794438 806653 806943 
##     63     89     98     58     26     47     69      8     36     59     39    220     16 
## 807297 811091 817650 818070 832253 832279 832287 832303 832311 832337 832345 869198 869222 
##     78    105      3     30     92     87    147     50     18    115     18     38    122 
## 869230 869248 870931 870949 870956 872515 874198 874206 874214 874222 876375 876383 876409 
##     44    121     81     41     22     79    225    108      9     16     20     24    171 
## 876417 876433 876441 876508 876524 876532 877308 878058 879791 879817 883967 884510 884528 
##    116    175     84    103     36     93     59     57    103     69    167     75     27 
## 884536 884544 884551 884569 884577 884585 884593 884601 884619 884627 884635 886218 886226 
##     24     42     71     93     22    112     43     44     93    117    110     24     90 
## 886234 886242 886259 886309 895482 895516 895607 895755 895813 897728 897819 899039 899062 
##     27     75    122     93     78     28     20     62     56     39     59     32     56 
## 899096 899112 899120 899187 899211 899237 899294 899328 899336 899369 899393 899419 900647 
##     31     83     47     58     27     27     20     14    100     47      9     20    141 
## 900670 900704 900738 900761 900795 900829 900852 900886 900910 900944 900977 901009 901033 
##     67    112     61     40    144     90     16     55     96     48    171    173     50 
## 901066 901082 901090 901124 906313 908848 913814 914085 915256 919308 919332 919365 
##     80     70    143     61     69     97     25    152     41     58     90     77 
##  [ reached getOption("max.print") -- omitted 24 entries ]
## [1] "Frequency table after encoding"
## cod_mod. cod_mod7
##  684  685  686  687  688  689  690  691  692  693  694  695  696  697  698  699  700  701  702 
##  129   96  114   56   48   86   89   36   34   33   11  171  246   79   63  121  272   90   92 
##  703  704  705  706  707  708  709  710  711  712  713  714  715  716  717  718  719  720  721 
##   83   58  114   39   16   93   17  116   41  240   60   85   16   71  259   49   23  160   69 
##  722  723  724  725  726  727  728  729  730  731  732  733  734  735  736  737  738  739  740 
##   71   58   75   44  191   22   76    8   81  103  202   57  147  140  144   11  223   28   64 
##  741  742  743  744  745  746  747  748  749  750  751  752  753  754  755  756  757  758  759 
##  237   46  217  402   36   75   14   25   55   79  239  111   62  107   60   44   41   95   96 
##  760  761  762  763  764  765  766  767  768  769  770  771  772  773  774  775  776  777  778 
##   59  437   29  244  113  126   80   44   20   29   76   52   99   58   29  247  246  158  188 
##  779  780  781  782  783  784  785  786  787  788  789  790  791  792  793  794  795  796  797 
##  219   16   61   55   27  118   22   28   56  175  160   44  101   79   32  110  312   36   32 
##  798  799  800  801  802  803  804  805  806  807  808  809  810  811  812  813  814  815  816 
##   83  183    9   56   57  111   62  141  121   25   26   58   48   18   80   48   40   71   82 
##  817  818  819  820  821  822  823  824  825  826  827  828  829  830  831  832  833  834  835 
##   67   36  181   20   26   12   40   64   53   26  105  122  157   60  155   18   71  178   70 
##  836  837  838  839  840  841  842  843  844  845  846  847  848  849  850  851  852  853  854 
##   42   42   11  177   46   43   38   73  183   78  132   92   36   99   61    9   67   62  164 
##  855  856  857  858  859  860  861  862  863  864  865  866  867  868  869  870  871  872  873 
##   50  157   67   37  160  108  177   45   78  187   78  106   42   60   26  454  111   26  202 
##  874  875  876  877  878  879  880  881  882  883  884  885  886  887  888  889  890  891  892 
##   93   38  179   78  125   43   54  129   31   61  173   77  163   56  225   47  190   72   93 
##  893  894  895  896  897  898  899  900  901  902  903  904  905  906  907  908  909  910  911 
##  168   30   67  128   89  419   94  387   98   74    9  167   84  256  150   41  125   11  117 
##  912  913  914  915  916  917  918  919  920  921  922  923  924  925  926  927  928  929  930 
##   93   28  175  142   37  145   24   99  127   76  298  120  199  119  110  163   56  161  152 
##  931  932  933  934  935  936  937  938  939  940  941  942  943  944  945  946  947  948  949 
##   45  104  193   97   47   32   44   57   66  108   99   31  333   12   66  232  109  272  138 
##  950  951  952  953  954  955  956  957  958  959  960  961  962  963  964  965  966  967  968 
##  145  118  171  226   98   69   58    5   73   60  296   56   60  517  121   81  157  160  106 
##  969  970  971  972  973  974  975  976  977  978  979  980  981  982  983  984  985  986  987 
##  156  117   39   36  114  247  387  284  108   53  259  180  133  113   86  255  198  282   26 
##  988  989  990  991  992  993  994  995  996  997  998  999 1000 1001 1002 1003 1004 1005 1006 
##  220   93  126   35   39   18  122  211   24  153  284   90   32   59  132  215  185   68  111 
## 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 
##   58  200  128  168   69  110  231   98   78   98  143  155  108   42  183   70   28  265  203 
## 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 
##   73  145   90  109   85   39  173   80  135  209   47  335   13   74   68  111  147  161  155 
## 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 
##   16   70   73   57   78  104   50  107   11   14   48   37   38  154   62   37  190  105   86 
## 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 
##  119   73   14   79   80  112   69  144  124   53   55   71  162   64   72   97  109   58   24 
## 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 
##  200  194  151   60  113   27  120   27   89   54  126   91  155  121   20   93   75   30   63 
## 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 
##   84   44  192   93   56   17  298  108   27   25  299   91  121  140  238  120   41  147   26 
## 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 
##  112   24   85   37  145   71  230  144   24  155   59  108  211  137   91  133  154    7  121 
## 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 
##   16   46   24  125  142   43   80   91  140   22   22   67   46  116   56   16   52   97   52 
## 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 
##   18   65   78   40   13  269   98   69   75   33   38   92   67  138   36   77   52   23   88 
## 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 
##   83   80   87   19   24  167   94  163  115  193  510   23   68  223   46  223   92  301  124 
## 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 
##  149   54  117   63   69   27  116   39   71   18  396  107  215   53  119  107   66  175   58 
## 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 
##   50   68   59   50  491   44   49  243  370   66  190  191  245    1  186   23  259   74  498 
## 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 
##   49   39   57   81  405  122  217   30  326   63  124  109   46   15   17   60   64  315   38 
## 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 
##  164   46  141   38   14   52  201   70  192   43   84   27   85  225   61  104  136  144   80 
## 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 
##  100   23   57   17  124   76   52  122   75   75  396  224  226   28  117   22  390   50  121 
## 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 
##  142  129   44   77  128  118   34  263   81   34   15  100   39  149   64  133   97  188   24 
## 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 
##   75   69   90   59  110  151   51  217   16   59   96   65   67  322   83   46  198   90   49 
## 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 
##  219   61   25   18  292  297  167   24  125   31  193  106  116   16   36  114   21  100  178 
## 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 
##  113   77  168  110  224  386    7   87  268  282  168   14   50  395   32   51   47  175   16 
## 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 
##   34   20   34   85   31   42  155  123  121   50  195  102  115  143   80  429  166  157  101 
## 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 
##   65   18   31  107   23   65  175   57   16   55  144  177   25   31  367   94  180   31  195 
## 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 
##  118   78   92   31   47   23   52   84   62   39  128   24   57  156   64   25  124  103  133 
## 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 
##   52    3   32   67  193   85   75   69   97   96  245  100   49   44  115  237    8   32   27 
## 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 
##  101  309  103   49   32   39   15  100  140   15   98   61   59   54   94   10   77  440  246 
## 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 
##   49   77   84   21   31   96  135   20  217   67   17  105  188   71   20    9  113   67   63 
## 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 
##   56  115   32  183    9   16  152   91  145   38   17   34  171  145  185   27   64  469   24 
## 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 
##  112   14   32   46  112   14   29   24   26   65   19   67   68   25  121   65   64  210  212 
## 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 
##   29   85   36  178   91   32  103   82   62   47  110   41   62   14   63   51   27   85   48 
## 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 
##   12  160   94  294  243   58  120   71  112   25  159   93  144   52   96  114   12   99   34 
## 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 
##   22   47   56  109   27  266   33   85  154  167  231   29   20  104   11   80   33  142   65 
## 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 
##   79   82   73   60   43   30  112   13   32  306   59   44  134  148   89  118  106   43  139 
## 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 
##   73   97  216   17   50  128  111  105  140   21   52  140    9   68   36  119  164  177   69 
## 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 
##   30  198   75   20   70  151  111   44   19  201   70  270   90   49  185   60   97   48   85 
## 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 
##   86   57  129   82  231   54   61   48   19  141  117   32  537   27   18   39   56   10   59 
## 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 
##  164  158  193  121   48   97   59   26   79  128   86   79   26   39   42   71   28   40  137 
## 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 
##   67  159   28  183    8   19  133   35  391   85   31  238 
##  [ reached getOption("max.print") -- omitted 24 entries ]

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

mydata$edad<- trunc(mydata$edad)
mydata <- top_recode ("edad", break_point=19, missing=c(888, 999999)) 
## [1] "Frequency table before encoding"
## edad. 
##     3     7    11    12    13    14    15    16    17    18    19    21    25    26    29    30 
##     1     2    50  8537 71499 16906  5665  1590   325    36    11     1     1     1     1     1 
##    34    38    40    42    45    46    54  <NA> 
##     1     2     2     1     1     1     1    25

## [1] "Frequency table after encoding"
## edad. 19
##          3          7         11         12         13         14         15         16 
##          1          2         50       8537      71499      16906       5665       1590 
##         17         18 19 or more       <NA> 
##        325         36         25         25

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
# !!!No Indirect PII - Categorical

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('sexo_2016', 'grado', 'edad') ##!!! Replace with candidate categorical demo vars

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 104661 rows and 43 variables.
##   --> Categorical key variables: sexo_2016, grado, edad
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size             Size of smallest (>0)        
##     sexo_2016                    3  (3) 52318.000 (52318.000)                 51121 (51121)
##         grado                    2  (2) 52330.500 (52330.500)                  4392  (4392)
##          edad                   12 (12)  9512.364  (9512.364)                     1     (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 2 (0.002%)
##   - 3-anonymity: 6 (0.006%)
##   - 5-anonymity: 9 (0.009%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 2 x 3
##   sexo_2016 grado                            edad
##       <dbl> <chr>                       <dbl+lbl>
## 1         1 Segundo de secundaria 19 [19 or more]
## 2         0 Segundo de secundaria 19 [19 or more]
sdcFinal <- localSuppression(sdcInitial)
## Warning in nextSdcObjX(obj): No previous states are saved because your data set has more than 100 000 observations.
# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will
## be used
##       sexo_2016                 grado edad
## 55776         1 Segundo de secundaria   NA
## 91814         0 Segundo de secundaria   NA
mydata [notAnon,"sexo_2016"] <- NA
createSdcObj(dat = mydata, keyVars = selectedKeyVars)
## The input dataset consists of 104661 rows and 43 variables.
##   --> Categorical key variables: sexo_2016, grado, edad
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size             Size of smallest (>0)        
##     sexo_2016                    3  (3) 52317.000 (52317.000)                 51120 (51120)
##         grado                    2  (2) 52330.500 (52330.500)                  4392  (4392)
##          edad                   12 (12)  9512.364  (9512.364)                     1     (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 6 (0.006%)
##   - 5-anonymity: 9 (0.009%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!!No Open-ends

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

range <- 1:length(names(mydata))

for (i in range)
  if(substr(names(mydata)[i],start=1,stop=1) == "_"){
    {
      names(mydata)[i]=gsub("_", "", names(mydata)[i])
    }}

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)