rm(list=ls(all=t))
filename <- "SAP2016_Rural_Raw_NOPII" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("A_NOM",
"C_NOM",
"A_APEPAT",
"C_APEPAT",
"A_APEMAT",
"C_APEMAT",
"J_DNI",
"L_DNI")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!Replace vector in "variables" field below with relevant variable names
mydata <- encode_direct_PII_team (variables=c("ENCUES"))
## [1] "Frequency table before encoding"
## ENCUES. C<f3>digo del Encuestador
## No indica 3 4 10 20 21 22 30
## 124 1 1 714 810 1 21 749
## 32 40 41 42 43 46 50 60
## 1 569 3 1 1 3 491 484
## 66 70 80 90 101 580
## 1 12 16 19 1 1
## [1] "Frequency table after encoding"
## ENCUES. C<f3>digo del Encuestador
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
## 124 1 1 714 810 1 21 749 1 569 3 1 1 3 491 484 1 12 16 19 1
## 22
## 1
dropvars <- c("DIGITA")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("CODLOC", "CODMOD")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## CODLOC. C<f3>digo de Local
## 68599 68603 68735 68924 69985 70074 70088 70111 70149 73044 73181 73435
## 1 1 1 1 4 2 1 2 1 4 1 1
## 73553 73572 73591 73609 130308 142655 147484 147714 150122 150202 150259 150532
## 1 1 1 2 5 3 1 1 1 5 1 5
## 150565 150768 150773 150792 150966 150971 151027 151188 151193 151664 152215 152239
## 1 2 3 10 6 10 7 7 3 4 1 2
## 152263 152588 152734 153818 153875 153955 154035 155054 157010 157053 157072 157227
## 4 3 2 7 7 2 1 1 3 11 5 4
## 157487 157500 157581 157595 157604 157618 157656 157661 157703 157717 157722 157736
## 7 1 4 3 2 1 2 2 1 1 1 3
## 157835 157840 157864 157982 158161 158175 158180 158255 158340 158359 158383 158401
## 8 5 3 1 3 3 4 2 7 20 2 4
## 158458 158482 158496 158509 158646 158707 158745 159491 159556 159815 164968 165091
## 6 4 2 10 2 1 1 6 1 3 1 2
## 165185 165246 165345 165473 165604 165717 165736 165741 165798 165802 165840 166076
## 4 1 3 8 9 4 1 10 1 6 1 4
## 166118 166627 166830 167170 167194 167212 167226 167231 167269 167311 167410 167537
## 3 2 1 22 1 4 12 1 3 1 1 5
## 167561 167575 167580 167617 167641 167679 169150 170200 170479 170506 170610 343357
## 2 2 1 7 6 4 4 1 2 2 1 3
## 505991 515508 517084 517102 531928 538208 538779 563151 582376 601493 603468 603581
## 2 1 1 2 3 2 2 1 1 5 4 1
## 603717 605146 606061 609248 672305 <NA>
## 1 2 1 4 1 3565
## [1] "Frequency table after encoding"
## CODLOC. C<f3>digo de Local
## 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
## 3 4 2 1 1 1 7 7 12 2 2 1 2 1 20 2 3
## 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
## 1 2 4 4 4 3 4 7 1 9 1 1 1 3 5 1 3
## 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
## 1 1 1 6 10 11 2 6 3 4 1 2 1 7 7 1 6
## 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
## 1 1 1 10 1 7 1 2 2 3 2 3 4 1 5 2 8
## 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
## 2 1 5 6 1 4 1 2 1 3 2 2 1 22 4 4 1
## 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
## 2 3 2 2 1 4 1 2 1 3 4 1 4 1 3 2 1
## 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
## 2 4 5 3 10 1 1 4 1 1 7 3 6 1 3 2 1
## 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
## 1 4 2 1 4 5 3 1 5 1 5 1 2 1 8 1 10
## 1085 <NA>
## 1 3565
## [1] "Frequency table before encoding"
## CODMOD. C<f3>digo Modular
## 204800 204875 204909 205005 205047 205112 205120 205153 205682 205690
## 11 5 2 8 6 9 13 18 9 8
## 205773 205781 205815 206334 207373 207407 216341 219741 220285 226704
## 9 2 7 3 1 1 7 4 3 9
## 232207 232223 232231 232249 232264 232504 232512 232538 232546 232553
## 9 17 15 2 11 3 21 3 7 5
## 232561 232579 232587 232595 232603 232611 232645 232728 232777 233130
## 4 3 7 4 9 3 1 3 6 4
## 233296 233361 233676 233718 233734 233825 233882 233890 233908 233916
## 6 4 3 11 5 29 1 29 6 34
## 233924 233932 233940 233957 233965 233973 233981 233999 234021 234062
## 19 3 3 15 3 22 9 12 15 8
## 234096 234104 234112 234120 234138 234153 234161 234187 234195 234203
## 17 16 4 10 12 12 15 10 2 4
## 234229 234237 234351 234369 234377 234385 234401 234419 234427 234443
## 4 14 1 33 5 8 7 21 18 10
## 234450 234500 234583 234674 234682 234781 234831 234856 236158 236349
## 3 14 6 16 8 4 7 7 7 5
## 236422 236448 236463 236471 236489 236653 236661 236927 287409 287425
## 20 6 8 1 7 2 38 19 8 8
## 287466 309294 309377 309435 309567 310441 312090 312215 312306 312421
## 3 11 1 1 4 1 2 5 12 3
## 312744 312868 313395 313460 313890 313908 313965 313981 314070 314187
## 2 2 7 1 4 7 6 12 3 3
## 314211 314237 314245 314252 314260 314278 314294 405258 405498 405704
## 4 5 3 6 7 7 6 7 13 5
## 405738 405746 405837 405852 405894 405902 405928 405936 406009 406066
## 8 5 1 18 11 11 15 15 6 10
## 406082 406116 406124 406140 406215 406223 406264 406413 406595 406629
## 11 6 5 3 17 4 4 17 22 19
## 406645 406975 406983 407007 407049 408211 408245 408278 408286 408294
## 22 9 23 8 27 1 8 5 5 3
## 408328 408336 408393 408468 408476 408484 408492 408559 408567 408609
## 20 1 5 18 12 6 10 2 13 11
## 408666 408732 408773 408823 408856 408922 408955 408971 409003 409011
## 16 5 5 3 9 11 6 7 24 16
## 409029 409193 409227 409235 409243 409284 409292 409300 409318 409326
## 37 2 20 9 5 17 15 10 17 8
## 409359 409441 409565 410464 410480 410514 410613 410746 410779 410787
## 13 16 23 1 14 5 1 7 10 5
## 410803 473249 481283 486688 486928 489120 495069 495325 498782 499863
## 10 19 24 15 2 11 14 16 1 4
## 502922 504142 517581 517888 518084 518472 519496 519595 519678 525923
## 2 17 30 8 21 10 12 6 6 26
## 550392 551309 557587 579268 579276 579284 579292 579300 585885 587055
## 7 3 16 9 1 1 25 41 6 19
## 587147 589200 589747 589804 591255 591602 592147 612051 612119 612291
## 3 1 1 6 31 36 4 2 2 5
## 612507 612689 612804 615013 616110 617787 617829 621391 623017 623041
## 9 2 5 4 6 20 5 14 4 3
## 637272 639542 647388 647446 647628 655746 671628 672105 679829 680058
## 8 4 16 17 6 1 8 5 3 11
## 680124 699603 712562 712778 723031 730655 731273 735498 736116 775700
## 22 12 1 2 2 7 13 4 2 5
## 776039 783423 783597 783621 783696 783704 783720 783787 783795 791319
## 4 16 1 1 15 1 24 10 14 21
## 791574 794438 796888 818674 818708 844159 844183 891408 891812 895482
## 5 2 4 6 4 3 4 14 1 1
## 927871 930958 931055 931063 931436 932236 932434 932491 932608 932848
## 18 9 25 23 1 21 1 4 17 13
## 933226 933283 933291 933317 933531 933598 933846 1031574 1117704 1120005
## 16 14 1 10 1 3 9 4 15 1
## 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330
## 17 14 2 5 1 2 2 11 7 17
## 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269
## 20 21 2 10 4 15 18 17 9 1
## 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248
## 19 14 1 15 28 20 15 1 7 24
## 1372507 1374438 1377209 1377233 1377415 1379361 1379544 1380021 1380120 1386226
## 7 3 24 1 29 1 5 28 4 8
## 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665
## 1 8 11 10 6 2 24 7 1 23
## 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224
## 22 19 5 18 8 14 36 21 2 9
## 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932
## 3 12 30 24 37 22 4 17 1 1
## 1401934 1401942 1401959 1402536 1408426 1412634 1412873 1415983 1418615 1423003
## 18 8 29 7 2 8 4 1 5 22
## 1442185 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192
## 6 10 10 20 17 11 15 11 16 15
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1718626 1719210 1723469
## 7 8 9 3 4 13 1 4 6 3
## [1] "Frequency table after encoding"
## CODMOD. C<f3>digo Modular
## 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
## 17 6 21 15 1 5 6 1 15 12 9 3 2 30 1 3 5 6 7 6 16
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
## 9 6 11 12 16 2 13 1 3 1 14 1 17 5 4 31 16 9 2 7 1
## 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
## 28 13 8 1 3 1 14 10 11 5 24 1 4 11 19 37 11 7 10 8 6
## 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
## 5 8 18 11 3 25 4 5 14 12 4 12 3 8 7 11 12 3 19 3 11
## 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
## 8 30 3 11 6 1 5 10 29 21 10 11 17 4 2 7 17 1 1 8 4
## 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
## 23 36 4 10 16 4 18 14 1 1 2 3 1 24 3 4 25 8 9 8 3
## 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
## 9 10 29 19 7 3 8 3 7 23 6 1 5 7 16 29 19 6 3 14 4
## 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
## 9 19 38 18 20 24 18 9 9 9 2 12 2 5 24 14 17 29 20 6 17
## 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
## 13 22 20 1 11 9 22 5 17 2 2 5 22 1 11 6 23 1 1 7 2
## 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
## 1 15 2 4 4 6 4 16 2 15 4 1 5 7 21 5 6 9 1 1 1
## 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
## 16 1 7 2 6 11 1 2 1 2 2 37 2 15 28 3 13 4 8 6 20
## 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
## 9 13 33 7 1 9 10 15 2 14 1 6 8 24 21 14 18 3 18 11 2
## 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
## 2 2 5 7 8 3 4 5 8 14 7 5 5 13 9 6 5 4 15 4 21
## 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
## 7 2 6 34 10 3 10 4 24 1 18 1 3 4 10 3 1 4 16 12 5
## 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
## 1 26 5 7 16 2 15 17 6 6 8 21 20 1 7 7 3 22 20 10 4
## 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
## 3 36 4 4 10 17 5 4 16 7 8 22 8 3 8 4 4 5 14 22 3
## 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
## 8 4 4 5 3 7 3 5 10 5 19 8 8 1 10 3 4 5 12 1 9
## 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
## 16 17 4 15 3 17 5 17 15 1 6 27 15 17 11 5 17 6 7 1 9
## 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736
## 2 23 15 18 41 7 1 1 1 19 11 15 6 4 15 2 13 15 14 1 2
## 737 738 739 740 741 742 743 744 745 746 747
## 24 7 12 10 22 7 3 21 20 9 8
dropvars <- c("NOMESC")
mydata <- mydata[!names(mydata) %in% dropvars]
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Delete birthdates
dropvars <- c("D_DD",
"D_MM",
"F_DD",
"F_MM")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("TURNO",
"G_SEXO",
"I_SEXO",
"M38B",
"M40")
capture_tables (indirect_PII)
# Recode those with very specific values.
# !!!No very specific values
# !!!Insufficient demographic data
# !!! Identify open-end variables here:
open_ends <- c("M16B")
report_open (list_open_ends = open_ends)
# !!!Remove, as they contain a lot of sensitive information and they are in Spanish.
mydata <- mydata[!names(mydata) %in% "M16B"]
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)