rm(list=ls(all=t))
filename <- "ECE_working_NOPII_Version" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("dni",
"nombres",
"apellido_paterno",
"apellido_materno",
"paterno_ece16",
"materno_ece16",
"nombre1_ece16",
"nombre2_ece16")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!No Direct PII-team
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("cod_mod")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## cod_mod. cod_mod7
## 207449 207472 207480 207506 209270 209304 209387 209510 209528 209536 209908 209916 209924
## 231 92 42 69 157 64 94 224 41 217 183 29 247
## 209940 209965 233056 233064 233130 233270 233288 236109 236117 236174 236182 236224 236232
## 142 282 386 160 168 105 73 263 255 247 70 202 46
## 236349 236364 236414 236778 236786 238667 238675 238808 238840 239798 239814 239822 240184
## 194 42 125 120 109 128 268 53 405 57 181 104 243
## 240259 240267 242271 242289 245647 245654 245662 245670 245688 245696 245704 262188 262196
## 193 232 211 246 157 178 110 183 230 152 61 198 111
## 273516 273524 275438 275479 275487 275545 275552 275719 290569 290585 290601 290619 290874
## 226 183 419 84 59 73 370 240 391 212 217 68 195
## 290890 302711 302885 302893 302943 302950 302968 304444 305656 309187 309229 309260 309302
## 155 46 396 149 306 395 144 396 76 223 284 192 66
## 309310 309336 309377 309385 309401 309435 309443 309468 309492 309500 309526 309633 309641
## 107 67 120 114 43 90 46 122 119 53 36 59 124
## 309682 309773 309781 309799 309807 309823 309856 309898 310185 310219 310441 310516 322479
## 20 259 193 216 99 113 30 48 25 83 34 157 75
## 323345 324772 325449 325456 325464 325472 325480 325498 325506 325548 325555 325563 325589
## 38 76 58 159 211 326 190 199 26 163 100 36 142
## 325597 325605 325613 325621 325639 325647 325654 325662 325670 325696 325704 325712 327379
## 166 148 266 128 87 270 35 245 107 98 237 120 121
## 327486 327650 329326 329573 329755 329805 329813 330464 334649 334656 334664 334672 334680
## 80 129 159 193 160 75 67 31 108 198 223 15 12
## 334706 334714 334722 334730 334748 334771 336495 336511 336537 336545 336578 336586 336594
## 296 11 231 81 109 18 282 22 94 238 46 195 82
## 336602 336610 336628 336636 336891 337436 337568 337592 337717 337733 337741 337766 340224
## 19 312 284 16 200 145 243 17 22 59 13 49 178
## 340281 340299 340315 340323 340331 340349 340356 340364 340372 340380 340398 340414 340422
## 52 129 124 84 28 294 37 43 70 98 44 224 124
## 340430 340448 340463 343566 355875 355883 355891 355990 356071 356089 356188 356329 356634
## 175 145 11 121 68 34 11 429 147 193 238 168 46
## 356758 356766 372508 372540 372565 372581 372599 372680 372698 372946 373761 373787 375758
## 157 48 491 309 226 298 510 437 72 56 537 322 97
## 390591 390609 390617 390690 390708 391060 391078 394288 394353 394445 394775 394825 395061
## 517 367 80 167 299 188 141 110 97 100 402 71 244
## 395079 395087 395103 395111 395152 395228 395236 395251 395269 411736 421248 421396 424507
## 107 114 135 45 145 390 469 190 335 201 239 70 498
## 424523 424564 424572 424580 424606 424770 427427 427690 427708 427716 427955 427971 432773
## 203 55 119 256 234 57 69 219 145 42 12 27 134
## 436642 437210 437228 437236 437244 437251 437269 437277 437285 437293 437319 437327 437335
## 67 50 315 141 232 265 387 167 187 74 62 26 52
## 437343 437350 437368 437400 437509 437517 437525 437533 437541 437707 437715 437723 437731
## 269 128 24 102 105 185 91 106 129 96 66 109 142
## 437749 437772 449819 449827 449868 452565 452573 452599 452623 452631 452649 452656 452722
## 259 38 78 60 223 454 440 292 288 85 301 175 38
## 452730 452748 452763 452771 452847 452854 452862 453597 453605 453647 466383 466722 466730
## 121 68 82 149 188 259 29 333 231 193 28 69 62
## 469205 469700 472209 472472 478479 481184 481242 481820 481853 481903 482042 488619 488635
## 180 77 33 28 188 86 71 150 65 48 79 110 31
## 489096 492504 492769 492876 493544 493635 493742 493841 494070 495150 495259 495424 495762
## 71 136 97 97 115 215 117 61 62 31 91 135 133
## 495812 496166 496265 497081 498782 498824 498881 498998 499228 499699 500124 500330 500348
## 121 60 58 5 42 177 24 108 60 246 67 46 49
## 500611 501411 501502 501601 501676 501700 501809 501908 502047 502104 502336 502435 502484
## 132 77 191 128 28 99 40 18 86 36 147 201 73
## 502633 504993 505149 507806 508168 508267 510396 515668 516963 517698 518241 519645 520064
## 99 297 180 17 22 113 9 57 81 24 119 167 158
## 520486 521179 522318 522862 523423 523431 523472 523621 524637 525725 533752 535195 535666
## 298 108 80 144 111 17 163 272 83 71 67 67 28
## 535724 535823 535930 536029 536128 536151 536326 536714 537761 542357 544957 545053 545251
## 129 156 100 108 96 200 111 85 125 54 171 177 15
## 545459 545509 546002 546986 547083 547463 547877 547976 550749 552612 553412 553420 553511
## 178 85 94 186 121 40 71 175 12 89 70 26 31
## 553529 554824 555847 555862 555946 556266 556332 556340 556449 556472 556571 564252 565119
## 94 175 14 21 111 47 23 56 89 25 64 64 18
## 565143 565176 565200 565234 565267 566141 566158 566166 566414 566422 566430 566448 566455
## 82 98 155 140 84 158 104 38 76 109 58 58 94
## 566463 566471 566489 566927 566950 567743 567750 568592 568618 568915 569053 569566 569988
## 99 138 39 16 8 17 68 34 151 61 163 44 48
## 570010 570044 570648 571620 572032 572768 573352 576363 576389 576439 576736 577346 577379
## 92 64 7 30 164 47 11 37 19 39 42 237 29
## 577387 577478 577486 577783 577817 577825 577833 577841 577858 577866 577874 577882 577890
## 190 106 245 60 80 151 70 109 75 168 78 30 117
## 577908 577916 578260 578278 578286 578294 578336 578351 578393 578401 578435 578443 578450
## 52 65 32 35 152 51 25 20 20 16 27 58 133
## 578468 578492 578500 578518 578526 578534 578542 578559 578773 578799 578823 579151 579177
## 78 23 164 107 62 191 93 60 85 36 82 113 65
## 579565 579573 579599 579607 579615 579623 579631 579649 579664 579672 579680 579698 579706
## 36 88 34 39 12 112 44 40 32 27 44 71 10
## 579714 579730 579748 579797 580514 580746 580753 580779 580803 580837 580928 580977 581710
## 85 50 39 120 111 43 98 154 14 52 19 63 47
## 581728 581736 581744 581777 581876 581884 581892 581900 581991 582114 582122 582148 582163
## 41 126 164 52 160 54 37 61 57 26 117 387 179
## 582171 582189 582254 582262 582304 582312 582387 582403 582411 582833 582866 582890 582932
## 118 104 160 67 138 168 59 161 38 58 32 66 140
## 582981 583013 583021 583088 583104 583203 583328 583443 583476 583500 583534 583567 583591
## 16 118 124 118 137 57 54 185 89 85 96 110 107
## 587485 589200 589234 591065 591131 591164 591198 591289 592816 594895 595298 596007 598581
## 33 140 45 23 116 74 62 28 29 15 24 9 56
## 598649 599159 599365 601492 603878 605469 605501 606392 606459 607143 607531 607549 607556
## 50 132 65 26 79 246 139 16 145 59 67 41 123
## 607697 609883 610683 612945 613638 614933 614966 614990 615690 615948 616185 616201 616433
## 217 103 19 24 32 68 85 81 79 110 54 95 44
## 616441 616466 616938 616961 617183 617191 617209 617217 617233 617290 618447 619361 622456
## 160 90 39 9 56 24 57 32 29 56 22 18 63
## 625830 626374 628842 632471 635284 635318 635334 635987 636019 636217 637132 637249 637306
## 140 98 154 26 155 13 108 24 183 60 19 73 90
## 639922 642801 642892 642926 643148 643163 643171 643221 643262 643668 643692 643783 643817
## 1 76 198 52 127 31 86 48 93 48 31 48 24
## 643841 643874 644690 644880 647057 647065 647172 649129 649483 649673 649731 649897 650002
## 67 140 44 23 116 142 38 144 69 27 106 77 75
## 650036 655746 655795 656447 656587 658716 659599 659623 659722 659896 659953 662940 662957
## 14 114 61 155 71 21 75 39 225 101 162 66 93
## 663005 663096 663112 663120 663138 663153 663534 663542 663559 663971 664292 664698 664706
## 44 37 92 56 80 103 25 49 83 104 67 217 113
## 664748 664912 664920 665265 665273 665281 665463 665471 665489 666438 667022 667394 668764
## 186 31 96 32 80 155 126 121 219 14 155 115 96
## 669341 671073 671438 672600 672618 672626 674374 675025 678581 678615 679670 681452 682229
## 31 107 16 33 68 128 97 24 77 50 46 91 108
## 682245 682252 682260 683946 685016 688283 690008 690024 690289 690297 691931 692434 692442
## 26 12 177 14 122 80 52 121 78 27 272 126 78
## 692459 692467 692707 693382 693499 693622 693630 693655 693663 694539 694547 694554 694562
## 53 75 41 137 156 60 50 154 97 52 111 26 91
## 694570 694588 694596 694604 695262 695270 695288 695296 695312 696948 697029 697045 701557
## 118 65 79 140 74 13 33 68 50 36 16 49 11
## 701755 703215 703223 703231 703249 703256 703744 703751 704072 704460 704593 705053 705160
## 11 106 93 202 144 121 117 34 44 85 32 75 73
## 705459 705509 705772 707646 709493 709527 709907 712885 712976 715961 716886 718718 719880
## 116 26 113 8 73 60 29 133 46 80 91 101 23
## 720235 723353 723486 723825 724815 725523 725739 725770 725861 727461 728055 728196 728337
## 10 25 32 86 57 32 34 71 67 25 18 64 161
## 728717 728907 730515 732321 732339 732347 732495 733147 735035 738542 738559 739557 742130
## 185 25 114 69 56 51 79 46 59 14 38 91 20
## 743179 743773 743781 743799 743807 743815 743831 744045 744565 744573 744870 744888 745448
## 14 17 49 64 183 43 58 16 56 36 32 69 97
## 745745 745752 749325 749358 749366 750083 750125 751230 753137 753178 756015 757922 757930
## 119 124 39 46 54 44 32 57 65 23 9 105 53
## 758078 759563 759571 759613 762773 762781 762849 762856 762864 762880 762906 762914 763169
## 92 97 17 23 27 100 44 143 112 118 177 215 39
## 763771 763789 764910 764928 764936 765297 765305 765313 765321 765362 765370 765396 765404
## 36 7 78 31 209 55 18 112 79 64 153 73 21
## 765412 766329 773788 773812 773846 774026 774455 774679 774703 774737 774760 774794 775320
## 52 115 49 52 39 34 72 111 69 37 144 59 128
## 775346 775874 775908 777243 777656 777680 777714 777995 778001 778027 778233 778738 778761
## 85 145 42 112 101 125 84 49 73 27 63 151 55
## 778795 779041 779868 780320 780759 781245 781278 781302 781336 781369 781930 782045 782078
## 210 114 17 63 85 51 86 32 125 121 192 65 99
## 782102 782664 782680 784512 785097 785873 785956 785964 787473 794412 794438 806653 806943
## 63 89 98 58 26 47 69 8 36 59 39 220 16
## 807297 811091 817650 818070 832253 832279 832287 832303 832311 832337 832345 869198 869222
## 78 105 3 30 92 87 147 50 18 115 18 38 122
## 869230 869248 870931 870949 870956 872515 874198 874206 874214 874222 876375 876383 876409
## 44 121 81 41 22 79 225 108 9 16 20 24 171
## 876417 876433 876441 876508 876524 876532 877308 878058 879791 879817 883967 884510 884528
## 116 175 84 103 36 93 59 57 103 69 167 75 27
## 884536 884544 884551 884569 884577 884585 884593 884601 884619 884627 884635 886218 886226
## 24 42 71 93 22 112 43 44 93 117 110 24 90
## 886234 886242 886259 886309 895482 895516 895607 895755 895813 897728 897819 899039 899062
## 27 75 122 93 78 28 20 62 56 39 59 32 56
## 899096 899112 899120 899187 899211 899237 899294 899328 899336 899369 899393 899419 900647
## 31 83 47 58 27 27 20 14 100 47 9 20 141
## 900670 900704 900738 900761 900795 900829 900852 900886 900910 900944 900977 901009 901033
## 67 112 61 40 144 90 16 55 96 48 171 173 50
## 901066 901082 901090 901124 906313 908848 913814 914085 915256 919308 919332 919365
## 80 70 143 61 69 97 25 152 41 58 90 77
## [ reached getOption("max.print") -- omitted 24 entries ]
## [1] "Frequency table after encoding"
## cod_mod. cod_mod7
## 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
## 129 96 114 56 48 86 89 36 34 33 11 171 246 79 63 121 272 90 92
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
## 83 58 114 39 16 93 17 116 41 240 60 85 16 71 259 49 23 160 69
## 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
## 71 58 75 44 191 22 76 8 81 103 202 57 147 140 144 11 223 28 64
## 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
## 237 46 217 402 36 75 14 25 55 79 239 111 62 107 60 44 41 95 96
## 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
## 59 437 29 244 113 126 80 44 20 29 76 52 99 58 29 247 246 158 188
## 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
## 219 16 61 55 27 118 22 28 56 175 160 44 101 79 32 110 312 36 32
## 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
## 83 183 9 56 57 111 62 141 121 25 26 58 48 18 80 48 40 71 82
## 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
## 67 36 181 20 26 12 40 64 53 26 105 122 157 60 155 18 71 178 70
## 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
## 42 42 11 177 46 43 38 73 183 78 132 92 36 99 61 9 67 62 164
## 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
## 50 157 67 37 160 108 177 45 78 187 78 106 42 60 26 454 111 26 202
## 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
## 93 38 179 78 125 43 54 129 31 61 173 77 163 56 225 47 190 72 93
## 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
## 168 30 67 128 89 419 94 387 98 74 9 167 84 256 150 41 125 11 117
## 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
## 93 28 175 142 37 145 24 99 127 76 298 120 199 119 110 163 56 161 152
## 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
## 45 104 193 97 47 32 44 57 66 108 99 31 333 12 66 232 109 272 138
## 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
## 145 118 171 226 98 69 58 5 73 60 296 56 60 517 121 81 157 160 106
## 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
## 156 117 39 36 114 247 387 284 108 53 259 180 133 113 86 255 198 282 26
## 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
## 220 93 126 35 39 18 122 211 24 153 284 90 32 59 132 215 185 68 111
## 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
## 58 200 128 168 69 110 231 98 78 98 143 155 108 42 183 70 28 265 203
## 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
## 73 145 90 109 85 39 173 80 135 209 47 335 13 74 68 111 147 161 155
## 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
## 16 70 73 57 78 104 50 107 11 14 48 37 38 154 62 37 190 105 86
## 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
## 119 73 14 79 80 112 69 144 124 53 55 71 162 64 72 97 109 58 24
## 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
## 200 194 151 60 113 27 120 27 89 54 126 91 155 121 20 93 75 30 63
## 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
## 84 44 192 93 56 17 298 108 27 25 299 91 121 140 238 120 41 147 26
## 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
## 112 24 85 37 145 71 230 144 24 155 59 108 211 137 91 133 154 7 121
## 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
## 16 46 24 125 142 43 80 91 140 22 22 67 46 116 56 16 52 97 52
## 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
## 18 65 78 40 13 269 98 69 75 33 38 92 67 138 36 77 52 23 88
## 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
## 83 80 87 19 24 167 94 163 115 193 510 23 68 223 46 223 92 301 124
## 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
## 149 54 117 63 69 27 116 39 71 18 396 107 215 53 119 107 66 175 58
## 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
## 50 68 59 50 491 44 49 243 370 66 190 191 245 1 186 23 259 74 498
## 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
## 49 39 57 81 405 122 217 30 326 63 124 109 46 15 17 60 64 315 38
## 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
## 164 46 141 38 14 52 201 70 192 43 84 27 85 225 61 104 136 144 80
## 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
## 100 23 57 17 124 76 52 122 75 75 396 224 226 28 117 22 390 50 121
## 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
## 142 129 44 77 128 118 34 263 81 34 15 100 39 149 64 133 97 188 24
## 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
## 75 69 90 59 110 151 51 217 16 59 96 65 67 322 83 46 198 90 49
## 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
## 219 61 25 18 292 297 167 24 125 31 193 106 116 16 36 114 21 100 178
## 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
## 113 77 168 110 224 386 7 87 268 282 168 14 50 395 32 51 47 175 16
## 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386
## 34 20 34 85 31 42 155 123 121 50 195 102 115 143 80 429 166 157 101
## 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
## 65 18 31 107 23 65 175 57 16 55 144 177 25 31 367 94 180 31 195
## 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424
## 118 78 92 31 47 23 52 84 62 39 128 24 57 156 64 25 124 103 133
## 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443
## 52 3 32 67 193 85 75 69 97 96 245 100 49 44 115 237 8 32 27
## 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
## 101 309 103 49 32 39 15 100 140 15 98 61 59 54 94 10 77 440 246
## 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
## 49 77 84 21 31 96 135 20 217 67 17 105 188 71 20 9 113 67 63
## 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
## 56 115 32 183 9 16 152 91 145 38 17 34 171 145 185 27 64 469 24
## 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
## 112 14 32 46 112 14 29 24 26 65 19 67 68 25 121 65 64 210 212
## 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
## 29 85 36 178 91 32 103 82 62 47 110 41 62 14 63 51 27 85 48
## 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557
## 12 160 94 294 243 58 120 71 112 25 159 93 144 52 96 114 12 99 34
## 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576
## 22 47 56 109 27 266 33 85 154 167 231 29 20 104 11 80 33 142 65
## 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
## 79 82 73 60 43 30 112 13 32 306 59 44 134 148 89 118 106 43 139
## 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
## 73 97 216 17 50 128 111 105 140 21 52 140 9 68 36 119 164 177 69
## 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
## 30 198 75 20 70 151 111 44 19 201 70 270 90 49 185 60 97 48 85
## 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652
## 86 57 129 82 231 54 61 48 19 141 117 32 537 27 18 39 56 10 59
## 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
## 164 158 193 121 48 97 59 26 79 128 86 79 26 39 42 71 28 40 137
## 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
## 67 159 28 183 8 19 133 35 391 85 31 238
## [ reached getOption("max.print") -- omitted 24 entries ]
mydata$edad<- trunc(mydata$edad)
mydata <- top_recode ("edad", break_point=19, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## edad.
## 3 7 11 12 13 14 15 16 17 18 19 21 25 26 29 30
## 1 2 50 8537 71499 16906 5665 1590 325 36 11 1 1 1 1 1
## 34 38 40 42 45 46 54 <NA>
## 1 2 2 1 1 1 1 25
## [1] "Frequency table after encoding"
## edad. 19
## 3 7 11 12 13 14 15 16
## 1 2 50 8537 71499 16906 5665 1590
## 17 18 19 or more <NA>
## 325 36 25 25
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
# !!!No Indirect PII - Categorical
# Based on dictionary inspection, select variables for creating sdcMicro object
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('sexo_2016', 'grado', 'edad') ##!!! Replace with candidate categorical demo vars
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 104661 rows and 43 variables.
## --> Categorical key variables: sexo_2016, grado, edad
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## sexo_2016 3 (3) 52318.000 (52318.000) 51121 (51121)
## grado 2 (2) 52330.500 (52330.500) 4392 (4392)
## edad 12 (12) 9512.364 (9512.364) 1 (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 2 (0.002%)
## - 3-anonymity: 6 (0.006%)
## - 5-anonymity: 9 (0.009%)
##
## ----------------------------------------------------------------------
Show values of key variable of records that violate k-anonymity
mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 2 x 3
## sexo_2016 grado edad
## <dbl> <chr> <dbl+lbl>
## 1 1 Segundo de secundaria 19 [19 or more]
## 2 0 Segundo de secundaria 19 [19 or more]
sdcFinal <- localSuppression(sdcInitial)
## Warning in nextSdcObjX(obj): No previous states are saved because your data set has more than 100 000 observations.
# Recombining anonymized variables
extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will
## be used
## sexo_2016 grado edad
## 55776 1 Segundo de secundaria NA
## 91814 0 Segundo de secundaria NA
mydata [notAnon,"sexo_2016"] <- NA
createSdcObj(dat = mydata, keyVars = selectedKeyVars)
## The input dataset consists of 104661 rows and 43 variables.
## --> Categorical key variables: sexo_2016, grado, edad
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## sexo_2016 3 (3) 52317.000 (52317.000) 51120 (51120)
## grado 2 (2) 52330.500 (52330.500) 4392 (4392)
## edad 12 (12) 9512.364 (9512.364) 1 (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 6 (0.006%)
## - 5-anonymity: 9 (0.009%)
##
## ----------------------------------------------------------------------
# !!!No Open-ends
# !!!No GPS data
range <- 1:length(names(mydata))
for (i in range)
if(substr(names(mydata)[i],start=1,stop=1) == "_"){
{
names(mydata)[i]=gsub("_", "", names(mydata)[i])
}}
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)