rm(list=ls(all=t))

Setup filenames

filename <- "SAP2016_Rural_Raw_NOPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("A_NOM",
"C_NOM",
"A_APEPAT",
"C_APEPAT",
"A_APEMAT",
"C_APEMAT",
"J_DNI",
"L_DNI")
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!!Replace vector in "variables" field below with relevant variable names

mydata <- encode_direct_PII_team (variables=c("ENCUES"))
## [1] "Frequency table before encoding"
## ENCUES. C<f3>digo del Encuestador
## No indica         3         4        10        20        21        22        30 
##       124         1         1       714       810         1        21       749 
##        32        40        41        42        43        46        50        60 
##         1       569         3         1         1         3       491       484 
##        66        70        80        90       101       580 
##         1        12        16        19         1         1 
## [1] "Frequency table after encoding"
## ENCUES. C<f3>digo del Encuestador
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21 
## 124   1   1 714 810   1  21 749   1 569   3   1   1   3 491 484   1  12  16  19   1 
##  22 
##   1
dropvars <- c("DIGITA")
mydata <- mydata[!names(mydata) %in% dropvars]

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("CODLOC", "CODMOD") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## CODLOC. C<f3>digo de Local
##  68599  68603  68735  68924  69985  70074  70088  70111  70149  73044  73181  73435 
##      1      1      1      1      4      2      1      2      1      4      1      1 
##  73553  73572  73591  73609 130308 142655 147484 147714 150122 150202 150259 150532 
##      1      1      1      2      5      3      1      1      1      5      1      5 
## 150565 150768 150773 150792 150966 150971 151027 151188 151193 151664 152215 152239 
##      1      2      3     10      6     10      7      7      3      4      1      2 
## 152263 152588 152734 153818 153875 153955 154035 155054 157010 157053 157072 157227 
##      4      3      2      7      7      2      1      1      3     11      5      4 
## 157487 157500 157581 157595 157604 157618 157656 157661 157703 157717 157722 157736 
##      7      1      4      3      2      1      2      2      1      1      1      3 
## 157835 157840 157864 157982 158161 158175 158180 158255 158340 158359 158383 158401 
##      8      5      3      1      3      3      4      2      7     20      2      4 
## 158458 158482 158496 158509 158646 158707 158745 159491 159556 159815 164968 165091 
##      6      4      2     10      2      1      1      6      1      3      1      2 
## 165185 165246 165345 165473 165604 165717 165736 165741 165798 165802 165840 166076 
##      4      1      3      8      9      4      1     10      1      6      1      4 
## 166118 166627 166830 167170 167194 167212 167226 167231 167269 167311 167410 167537 
##      3      2      1     22      1      4     12      1      3      1      1      5 
## 167561 167575 167580 167617 167641 167679 169150 170200 170479 170506 170610 343357 
##      2      2      1      7      6      4      4      1      2      2      1      3 
## 505991 515508 517084 517102 531928 538208 538779 563151 582376 601493 603468 603581 
##      2      1      1      2      3      2      2      1      1      5      4      1 
## 603717 605146 606061 609248 672305   <NA> 
##      1      2      1      4      1   3565 
## [1] "Frequency table after encoding"
## CODLOC. C<f3>digo de Local
##  949  950  951  952  953  954  955  956  957  958  959  960  961  962  963  964  965 
##    3    4    2    1    1    1    7    7   12    2    2    1    2    1   20    2    3 
##  966  967  968  969  970  971  972  973  974  975  976  977  978  979  980  981  982 
##    1    2    4    4    4    3    4    7    1    9    1    1    1    3    5    1    3 
##  983  984  985  986  987  988  989  990  991  992  993  994  995  996  997  998  999 
##    1    1    1    6   10   11    2    6    3    4    1    2    1    7    7    1    6 
## 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 
##    1    1    1   10    1    7    1    2    2    3    2    3    4    1    5    2    8 
## 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 
##    2    1    5    6    1    4    1    2    1    3    2    2    1   22    4    4    1 
## 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 
##    2    3    2    2    1    4    1    2    1    3    4    1    4    1    3    2    1 
## 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 
##    2    4    5    3   10    1    1    4    1    1    7    3    6    1    3    2    1 
## 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 
##    1    4    2    1    4    5    3    1    5    1    5    1    2    1    8    1   10 
## 1085 <NA> 
##    1 3565 
## [1] "Frequency table before encoding"
## CODMOD. C<f3>digo Modular
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690 
##      11       5       2       8       6       9      13      18       9       8 
##  205773  205781  205815  206334  207373  207407  216341  219741  220285  226704 
##       9       2       7       3       1       1       7       4       3       9 
##  232207  232223  232231  232249  232264  232504  232512  232538  232546  232553 
##       9      17      15       2      11       3      21       3       7       5 
##  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130 
##       4       3       7       4       9       3       1       3       6       4 
##  233296  233361  233676  233718  233734  233825  233882  233890  233908  233916 
##       6       4       3      11       5      29       1      29       6      34 
##  233924  233932  233940  233957  233965  233973  233981  233999  234021  234062 
##      19       3       3      15       3      22       9      12      15       8 
##  234096  234104  234112  234120  234138  234153  234161  234187  234195  234203 
##      17      16       4      10      12      12      15      10       2       4 
##  234229  234237  234351  234369  234377  234385  234401  234419  234427  234443 
##       4      14       1      33       5       8       7      21      18      10 
##  234450  234500  234583  234674  234682  234781  234831  234856  236158  236349 
##       3      14       6      16       8       4       7       7       7       5 
##  236422  236448  236463  236471  236489  236653  236661  236927  287409  287425 
##      20       6       8       1       7       2      38      19       8       8 
##  287466  309294  309377  309435  309567  310441  312090  312215  312306  312421 
##       3      11       1       1       4       1       2       5      12       3 
##  312744  312868  313395  313460  313890  313908  313965  313981  314070  314187 
##       2       2       7       1       4       7       6      12       3       3 
##  314211  314237  314245  314252  314260  314278  314294  405258  405498  405704 
##       4       5       3       6       7       7       6       7      13       5 
##  405738  405746  405837  405852  405894  405902  405928  405936  406009  406066 
##       8       5       1      18      11      11      15      15       6      10 
##  406082  406116  406124  406140  406215  406223  406264  406413  406595  406629 
##      11       6       5       3      17       4       4      17      22      19 
##  406645  406975  406983  407007  407049  408211  408245  408278  408286  408294 
##      22       9      23       8      27       1       8       5       5       3 
##  408328  408336  408393  408468  408476  408484  408492  408559  408567  408609 
##      20       1       5      18      12       6      10       2      13      11 
##  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011 
##      16       5       5       3       9      11       6       7      24      16 
##  409029  409193  409227  409235  409243  409284  409292  409300  409318  409326 
##      37       2      20       9       5      17      15      10      17       8 
##  409359  409441  409565  410464  410480  410514  410613  410746  410779  410787 
##      13      16      23       1      14       5       1       7      10       5 
##  410803  473249  481283  486688  486928  489120  495069  495325  498782  499863 
##      10      19      24      15       2      11      14      16       1       4 
##  502922  504142  517581  517888  518084  518472  519496  519595  519678  525923 
##       2      17      30       8      21      10      12       6       6      26 
##  550392  551309  557587  579268  579276  579284  579292  579300  585885  587055 
##       7       3      16       9       1       1      25      41       6      19 
##  587147  589200  589747  589804  591255  591602  592147  612051  612119  612291 
##       3       1       1       6      31      36       4       2       2       5 
##  612507  612689  612804  615013  616110  617787  617829  621391  623017  623041 
##       9       2       5       4       6      20       5      14       4       3 
##  637272  639542  647388  647446  647628  655746  671628  672105  679829  680058 
##       8       4      16      17       6       1       8       5       3      11 
##  680124  699603  712562  712778  723031  730655  731273  735498  736116  775700 
##      22      12       1       2       2       7      13       4       2       5 
##  776039  783423  783597  783621  783696  783704  783720  783787  783795  791319 
##       4      16       1       1      15       1      24      10      14      21 
##  791574  794438  796888  818674  818708  844159  844183  891408  891812  895482 
##       5       2       4       6       4       3       4      14       1       1 
##  927871  930958  931055  931063  931436  932236  932434  932491  932608  932848 
##      18       9      25      23       1      21       1       4      17      13 
##  933226  933283  933291  933317  933531  933598  933846 1031574 1117704 1120005 
##      16      14       1      10       1       3       9       4      15       1 
## 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 
##      17      14       2       5       1       2       2      11       7      17 
## 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 
##      20      21       2      10       4      15      18      17       9       1 
## 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 
##      19      14       1      15      28      20      15       1       7      24 
## 1372507 1374438 1377209 1377233 1377415 1379361 1379544 1380021 1380120 1386226 
##       7       3      24       1      29       1       5      28       4       8 
## 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665 
##       1       8      11      10       6       2      24       7       1      23 
## 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 
##      22      19       5      18       8      14      36      21       2       9 
## 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932 
##       3      12      30      24      37      22       4      17       1       1 
## 1401934 1401942 1401959 1402536 1408426 1412634 1412873 1415983 1418615 1423003 
##      18       8      29       7       2       8       4       1       5      22 
## 1442185 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 
##       6      10      10      20      17      11      15      11      16      15 
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1718626 1719210 1723469 
##       7       8       9       3       4      13       1       4       6       3 
## [1] "Frequency table after encoding"
## CODMOD. C<f3>digo Modular
## 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 
##  17   6  21  15   1   5   6   1  15  12   9   3   2  30   1   3   5   6   7   6  16 
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 
##   9   6  11  12  16   2  13   1   3   1  14   1  17   5   4  31  16   9   2   7   1 
## 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 
##  28  13   8   1   3   1  14  10  11   5  24   1   4  11  19  37  11   7  10   8   6 
## 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 
##   5   8  18  11   3  25   4   5  14  12   4  12   3   8   7  11  12   3  19   3  11 
## 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 
##   8  30   3  11   6   1   5  10  29  21  10  11  17   4   2   7  17   1   1   8   4 
## 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 
##  23  36   4  10  16   4  18  14   1   1   2   3   1  24   3   4  25   8   9   8   3 
## 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 
##   9  10  29  19   7   3   8   3   7  23   6   1   5   7  16  29  19   6   3  14   4 
## 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 
##   9  19  38  18  20  24  18   9   9   9   2  12   2   5  24  14  17  29  20   6  17 
## 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 
##  13  22  20   1  11   9  22   5  17   2   2   5  22   1  11   6  23   1   1   7   2 
## 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 
##   1  15   2   4   4   6   4  16   2  15   4   1   5   7  21   5   6   9   1   1   1 
## 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 
##  16   1   7   2   6  11   1   2   1   2   2  37   2  15  28   3  13   4   8   6  20 
## 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 
##   9  13  33   7   1   9  10  15   2  14   1   6   8  24  21  14  18   3  18  11   2 
## 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 
##   2   2   5   7   8   3   4   5   8  14   7   5   5  13   9   6   5   4  15   4  21 
## 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 
##   7   2   6  34  10   3  10   4  24   1  18   1   3   4  10   3   1   4  16  12   5 
## 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 
##   1  26   5   7  16   2  15  17   6   6   8  21  20   1   7   7   3  22  20  10   4 
## 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 
##   3  36   4   4  10  17   5   4  16   7   8  22   8   3   8   4   4   5  14  22   3 
## 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 
##   8   4   4   5   3   7   3   5  10   5  19   8   8   1  10   3   4   5  12   1   9 
## 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 
##  16  17   4  15   3  17   5  17  15   1   6  27  15  17  11   5  17   6   7   1   9 
## 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 
##   2  23  15  18  41   7   1   1   1  19  11  15   6   4  15   2  13  15  14   1   2 
## 737 738 739 740 741 742 743 744 745 746 747 
##  24   7  12  10  22   7   3  21  20   9   8
dropvars <- c("NOMESC")
mydata <- mydata[!names(mydata) %in% dropvars]

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

# Delete birthdates

dropvars <- c("D_DD",
"D_MM",
"F_DD",
"F_MM")
mydata <- mydata[!names(mydata) %in% dropvars]

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("TURNO",
                  "G_SEXO",
                  "I_SEXO",
                  "M38B",
                  "M40")
capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("M16B")

report_open (list_open_ends = open_ends)

# !!!Remove, as they contain a lot of sensitive information and they are in Spanish.
mydata <- mydata[!names(mydata) %in% "M16B"]

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)