rm(list=ls(all=t))

Setup filenames

filename <- "IDT_raw_parents_NOPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables

dropvars <- c("prompt_student_id",
              "student_id",
              "student_name",
              "no_guardian_name",
              "grado2016_admin_name",
              "sap_househead_name",
              "consent_signature",
              "consent_signature_paper",
              "hh_name1",
              "hh_name2",
              "hh_lastname1",
              "hh_lastname2",
              "hh_dni",
              "i31",
              "p60",
              "reference",
              "i32",
              "random_audio_hh",
              "p27a1",
              "p27a2",
              "p27a3",
              "p27a4",
              "p27a5",
              "p27a6",
              "p27a7",
              "p27a8",
              "p27c1",
              "p27c2",
              "p27d1",
              "p27d2",
              "p27d3",
              "p27d4",
              "p27ma1",
              "p27ma2",
              "p27ma3",
              "p27ma4",
              "p27ma5",
              "p27ma6",
              "p27mc1",
              "p27mc2",
              "p27mc3",
              "p27mc4",
              "p27mc5",
              "p27mc6",
              "p27md1", 
              "address") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!!Replace vector in "variables" field below with relevant variable names

dropvars <- c("i4")
mydata <- mydata[!names(mydata) %in% dropvars]

mydata <- encode_direct_PII_team (variables=c("i5"))
## [1] "Frequency table before encoding"
## i5. Seleccione el nombre del encuestador
## Missing-MINEDU 
##           2735 
## [1] "Frequency table after encoding"
## i5. Seleccione el nombre del encuestador
##    1 
## 2735

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("cod_mod2016_admin", "district", "p63") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## cod_mod2016_admin. 
##         1007160 1008440 1008929 1008960 1009844 1010040 1010149 1010180 1034016 
##       1       1      16       2       4       6       2      10       1       3 
## 1039676 1041516 1041557 1041631 1045111 1045434 1045715 1045798 1046226 1048990 
##       1       1       2      15       5       3       1      10       1       3 
## 1049493 1053628 1053669 1054154 1054196 1054238 1054352 1054394 1054436 1056902 
##       1       8       5       8       3       4       4       1       1       4 
## 1063023 1063106 1063148 1063221 1063304 1064989 1066026 1068238 1069954 1070077 
##       1       8      12       5       8       3       7       3       3      10 
## 1070390 1071919 1072040 1072727 1073212 1074301 1075779 1080068 1080258 1082874 
##       4       9       1       2       1       5       1       6       3       1 
## 1083633 1083674 1083716 1083815 1084508 1084987 1085851 1085976 1088400 1099654 
##       2       2       4       2      11       1       3       2       5       1 
## 1194265 1194380 1194810 1195189 1195577 1196526 1223023 1238229 1240720 1241454 
##      11      11       4       7       1       3       6       5       9       1 
## 1242908 1247832 1248392 1248509 1254192 1258334 1258649 1261742 1264340 1264670 
##       1       2      11       1       1       1       3       3       1       2 
## 1266840 1272822 1278662 1279124 1309392 1309574 1313444 1330315 1332220 1346675 
##       2       2       1      10       2       8       3       1       6       1 
## 1349448 1351410 1354091 1362318 1375211 1376870 1381342 1381599 1381896 1382829 
##       1       1       1       2       2       1       4       8       2       4 
## 1385251 1392893 1393453 1398148 1401801 1411438 1420694 1423615 1431667 1438027 
##       1       1       2       7       1       2       3      10       1       2 
## 1438035 1453232 1464668 1469675 1473511 1474600 1474964 1475011 1475201 1475284 
##       2       1       1       1       1       2       1       9      11       7 
## 1476258 1476464 1481514 1481720 1482975 1483627 1487339 1489822 1492255 1493964 
##       1       3       1       1       1       1       1       1       1       1 
## 1495365 1495407 1496314 1496355 1497007 1497551 1499748 1499961 1500354 1501188 
##      10       5       1       2       4       1       1       1       4       2 
## 1501451 1505494 1507094 1507250 1507276 1507532 1509108 1509496 1512789 1515360 
##      11      14      12      12       1      12       1       2       1       1 
## 1520279 1520287 1528520 1529981 1536994 1541879 1573328 1575323 1640556 1641521 
##       1       4       1       1       1       1       1       1       2      10 
## 1661271 1666882 1697234 1699933 1701002  207795  207894  207985  208058  208348 
##      10       1       3       1       1       4       1       2       4       2 
##  208538  208546  208561  208587  208694  208736  209304  209387  209510  209528 
##       3       3       7       1       1       5       3       4       1       1 
##  209536  209908  209916  209924  209940  209965  209973  210260  215632  245647 
##       6      20       6       2       6       9       4       1       8       2 
##  245654  245662  245670  245688  245696  245704  305656  314500  317131  317214 
##       1       7       6      10       1       1       2       1       3       2 
##  317289  317305  317313  317370  317453  317479  317560  317610  317941  318063 
##       1       1       3       1       2       2       5       1       1       4 
##  318089  318287  318352  318782  318949  319004  319020  319061  319145  319160 
##       4       1       3       1       1       1       1       1       2       2 
##  319285  320655  322479  322685  322974  323345  323378  325449  325464  325472 
##       4       1       6       1       2       3       4       1       5      11 
##  325480  325548  325555  325563  325589  325613  325647  325670  325704  328039 
##       1       1       2      11       5       1       9       6       8       2 
##  328047  328260  328468  328518  328526  329029  329128  329151  329573  329755 
##       3       2       1       3       3       2       1       1       7       4 
##  329805  330464  333666  334649  334656  334664  334672  334680  334706  334722 
##       2       2       5       4       2       8      10       1       8       3 
##  334730  334748  334847  334920  334987  335042  335091  335224  336495  336537 
##       1      10       2       1       9       4      10       1       2       4 
##  336545  336560  336586  336594  336610  336628  336636  337436  337568  337592 
##       3       5       5       2       3       7       5       7       5       4 
##  337733  337741  337766  338129  338228  338301  338343  338517  338640  338665 
##       2       1       1       1       2       3       2       6       5       3 
##  338848  339051  339317  339432  339499  339606  339804  340224  340281  340299 
##       1       7       1       1       1       5       1       8       1       2 
##  340315  340349  340372  340380  340398  340414  340422  340463  343566  432773 
##      10       7       1       6       1       3       2       1       2       3 
##  432906  433227  433235  433276  433490  433540  433680  433821  433961  434019 
##       1       4       1       6       6       4       4       8       5       4 
##  434076  434159  434191  434282  434464  434480  434498  434506  434548  434597 
##       2       3       3       3       3       4       3       2       3       2 
##  434829  436170  436212  436287  436303  436360  436444  436451  436493  436543 
##       5       1       8       1       2       5       5       6       4       2 
##  436584  436634  436642  436725  436766  436782  437210  437228  437236  437244 
##       1       4       1       1       5       1       7      27      10       8 
##  437251  437269  437277  437285  437319  437335  437343  437350  437400  437509 
##       2       2      12       8      13       4       1       6       4       2 
##  437525  437707  437715  437723  437731  437749  437772  449868  466730  468488 
##       1       3       6       2       3       1       1       7      16       2 
##  468611  469205  469700  481853  481903  482042  488619  488635  493239  493544 
##       2       2       8       2       9       1       9       9       1      12 
##  495259  495812  496166  496844  497024  499699  500124  500348  500611  501411 
##       4       4      13       1       2      10       1      10       2       1 
##  501502  501601  501676  501809  502435  502633  504993  505149  508903  510305 
##       4      10       6       4       1      10       1       1       5       1 
##  510800  513614  516674  519645  520486  521179  522318  522862  523423  523464 
##       1       2       3       4       2       4       1       1       2       4 
##  523621  523662  523761  526301  534321  535823  536029  536128  536151  536326 
##       2       1       5       5       1       4       3       3       1      16 
##  546002  555847  555862  555946  556266  556472  556548  556571  565119  565143 
##      19       1       7       3       2       2       2      12       3       2 
##  565200  565234  565267  566141  566158  566414  566430  566455  566463  566471 
##       2       9       2      21       2       4       5       4       3      14 
##  567743  567750  567768  578260  578278  578286  578336  578351  578401  578443 
##       1       9       1       3       2      10       1       3       9       2 
##  578518  578526  578534  578542  581710  581728  581736  581744  581777  581876 
##      12       9      15       8       2       2       8       3       2       5 
##  581892  581900  581991  582114  582122  582148  582163  582254  582304  582312 
##       2       6       3       1       1       1       3       3       4       5 
##  582387  582403  582411  582833  582866  582890  582932  582981  583013  583088 
##      14      11       9      12       4      14       2       3       9       2 
##  583328  583476  583567  583591  583922  598581  599159  599365  601492  603878 
##       4       6       2      20       4       7       2      11       2       2 
##  605469  605501  607424  607556  607697  628404  628602  628842  629261  629295 
##       7       1       1       3       1       2       2       3       2       2 
##  632299  632471  639922  642801  642892  643692  643783  643817  644880  647172 
##       3       1       4       3       3       2       2       7       3       3 
##  649129  649947  650002  650036  652081  656447  659623  659698  659722  659896 
##       1       5       1       9       1       1       1       3       5      12 
##  659953  662940  662957  663005  663013  663096  663112  663120  663138  663526 
##       4       2       1       3       1       5      10       1       1       1 
##  663534  663542  663559  663682  663971  664292  664508  664698  664748  664920 
##       4       3       7       4      10       1       1      10       9       3 
##  665489  691931  692434  692442  693499  693655  694547  694562  694570  694588 
##       1       5       5       1      12       2       2       1       9      11 
##  694596  694604  697557  703215  703223  703256  703736  703744  703751  704072 
##       1       6       1       8       1       1       2       9       7       1 
##  704312  704445  704460  705053  705129  705475  705772  725770  725861  728055 
##       2       1       1       8       5       1       1       7       4       2 
##  728196  728717  732321  732347  732495  739367  743773  743807  743815  743831 
##       4      12       1       1       6       1      13       1       5      13 
##  744540  744557  744573  751230  759399  759613  762120  762468  762773  762856 
##       3       6       2       1       3      11       1       1       8       5 
##  762864  762906  762914  763151  764134  764779  764936  765297  765305  765313 
##       1       4       6       6       1       6       6       5      10       2 
##  765321  765396  765412  765859  772970  773788  774026  774455  774679  774703 
##       1       5       4       3       1       6      12       2       1       2 
##  775312  775833  775874  777110  777144  777656  777680  777995  778027  778233 
##       3       2       2       1       1      18      10       8       1      12 
##  778738  778795  779041  779868  780759  780767  780791  781278  781302  781351 
##       2       1       1       1       1       1       2       5       9       4 
##  781369  781385  781831  781930  782102  820407  821082  824003  824813  825752 
##      17       1       3       1       6       4       9       1       3       1 
##  828962  832253  832279  832287  832303  832311  832337  834853  835058  846048 
##       1       1       6       2       5       7       1       2       7       4 
##  847087  855791  869198  870931  871160  872515  874198  874214  875476  879791 
##       1       2      10       4       1      12       1       1       1      13 
##  879817  883884  884510  884528  884544  884551  884593  884627  885517  900704 
##       1       1       2       2       4       3       2       1       1       1 
##  900761  900852  900910  900977  901033  901066  901082  901413  901587  915256 
##       5       3       6       2       1       1       2       1       1       2 
## [1] "Frequency table after encoding"
## cod_mod2016_admin. 
## 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 
##  10  11   2   1   5   4   3   4   1   5   2   1   2   3  12   8   1   1   8   3  10 
## 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 
##   1   8   2   1   1   1   3  10   1   4   3   6  10   1   7   7   5  14   1   1  20 
## 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 
##   1  10   2   4   3   4  12  15   8   4   1   1  13   6  10   1   1   2  19   4   2 
## 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 
##   1   2   3   9   4   1   3   1   1  11   2   9   1   2   2   1   3   1   1   2   1 
## 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 
##   1   6   2  12   2   2   3   1   1   3   4  10   2   3   1   1   4  21   6   2   2 
## 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 
##   1   9   3   1   2   4   5   5   2   7   2   2   5   2   3   1   3  10   1   3   1 
## 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 
##   1   1   1   7   1   8  13  12   9   1   4   1   1   3   1   4   9  11   3  11   6 
## 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 
##   5   1   1   2   1   2   7  12  10   1   2   2   1  15  12  11   2  12   1   6   5 
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 
##   7   4   3   2   2   1   1   4   1   3   2  10   5   9   4   3   1   1  11   2   1 
## 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 
##   2   1   2   2   1   2   4   2   4   9   4   1   2   1   1   1   8   1   6   4   7 
## 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 
##   1   2   1   2   8   3   1   1   9   2   2   2   1   5   1   2  12   5   1   8   4 
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 
##   6  20   6   1   7   1   1   5   3   1   2   9   1   2   2   1   1  12   1  10   2 
## 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 
##   2   1   1   1  27   1   2   7   1   2   1   1   5   1   1   1  10   4   1   3   1 
## 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 
##   1   1   1   3   4   3   6   3   9   3   2   4   6   1   1   4  12   5  13   1   1 
## 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 
##   8   8   7   5   3   4   1   1   1   7   4   6   5  10   2   2   1   1   1   1  11 
## 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 
##   1   2   8  18   1   3   2   8   4   5   4  10   2   2   2   9   2   3   1   6   1 
## 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 
##   3   1   1   3   4   4  16   1   3   9   2   3   3   1  11   2   7   4  10   1  12 
## 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 
##   2   2  17   3   2   4   1   1   2   2   1   3   1   1   1   2   2   9   8   9   1 
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 
##   4   2   2   1   3   4   9   9   2   2   5   1   1   2  12   4   2   4   1   1   6 
## 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 
##   1  16   1   5   5   1   1   1   2   4   1  10   2   5   9   4  13   3   4   1   1 
## 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 
##   6   5   3   3   2   1   2   2   8  10   2   3   3   5   2  12   3   3   3  14   3 
## 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 
##   3   4   5   1   2   5  10   7   4   1   3   2   4   6   2   2  10   8   4   3   1 
## 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 
##   5   9   1   5   3  12   1   1   1  16   2   5  14  14   2   3   1   2   3   4   7 
## 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 
##   1   3  10  11   1   2  10  10   3   1   6   1   2   8   5   2   9   1   1  11   2 
## 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 
##   6   1   1  10   4   1   2   1   8   3   6   1   4   1   6   1   1   7   4   3  11 
## 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 
##   1   5   7   4   1   1  11   2   6   1   2   2   1   1   4   1   4   3   1   1   2 
## 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 
##   5   4   2   1   4   5   2   1   1   5   6   2   4   6   5   6   4   5   1   1   1 
## 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 
##   1   2   6   1   3   2   6   2   6   1   1   1   2   5   3   1   1   8   7   1   4 
## 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 
##   1   2   3   4   2   7   2   5   5   1   3   1   1   5   1  10   1   1   3   1   2 
## 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 
##   1   7   3   5   3   5   6   1   8   1   3   9   2   1   1   1   3   2   1   1   1 
## 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 
##   1   1   1   1   6   1   1   1  12   2   1   1   5   2   4   3   7   7   7  10   1 
## 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 
##   8   9   1   6   3   1   8   8  13   1   2   4   2   2   4   3   1   3   2   1   3 
## 917 918 919 920 921 922 923 924 
##   6   1   1   1   2   1   2   4 
## [1] "Frequency table before encoding"
## district. Distrito del hogar
##                     Callao                 Bellavista Carmen de la Legua Reynoso 
##                        196                         14                          5 
##                   La Perla                 Ventanilla               Lima Cercado 
##                         17                        105                         93 
##                      Ancón                        Ate                      Breña 
##                         40                        221                         11 
##                 Carabayllo                 Chaclacayo                 Chorrillos 
##                         68                         19                         39 
##                Cieneguilla                      Comas                El Agustino 
##                         34                        232                        122 
##              Independencia                Jesús María                  La Molina 
##                         64                          5                          8 
##                La Victoria                      Lince                 Los Olivos 
##                         24                          2                         51 
##       Lurigancho - Chosica                      Lurin          Magdalena del Mar 
##                        132                         24                         11 
##               Pueblo Libre                 Miraflores                 Pachacamac 
##                         11                          2                         29 
##              Puente Piedra              Punta Hermosa                Punta Negra 
##                         88                          9                          1 
##                      Rímac                San Bartolo                  San Borja 
##                         86                          1                         10 
##                 San Isidro     San Juan de Lurigancho     San Juan de Miraflores 
##                          1                        253                         99 
##                   San Luis       San Martín de Porres                 San Miguel 
##                          3                        215                         24 
##                Santa Anita                 Santa Rosa          Santiago de Surco 
##                         50                          1                         19 
##                  Surquillo          Villa El Salvador    Villa María del Triunfo 
##                         13                        127                        155 
##                       <NA> 
##                          1 
## [1] "Frequency table after encoding"
## district. Distrito del hogar
##  345  346  347  348  349  350  351  352  353  354  355  356  357  358  359  360  361 
##  122    2  232   14   34   19    1   24   24    5  196  127   10   40  253   68   13 
##  362  363  364  365  366  367  368  369  370  371  372  373  374  375  376  377  378 
##  105   39   51  215   88   11    3   19   11   86    9  221   24    1   64  132   93 
##  379  380  381  382  383  384  385  386  387  388  389 <NA> 
##    1    1   99    2   50   29    5   17  155   11    8    1 
## [1] "Frequency table before encoding"
## p63. ¿En qué escuela está matriculado/a o está pensando matricular a ${student_name}
##         1007160 1007491 1008440 1008929 1008960 1009844 1010180 1034016 1039676 
##     634       1       1       3       3       1       8       1       3       1 
## 1041557 1041631 1045111 1045434 1045715 1045756 1045798 1048990 1049493 1053628 
##       2       8       1       3       1       1       3       1       2       5 
## 1053669 1054071 1054154 1054196 1054238 1054311 1054352 1054436 1056902 1063106 
##       2       1       4       1       1       1       2       1       1       5 
## 1063148 1063262 1063304 1063346 1064989 1066026 1068238 1069954 1070077 1070119 
##       4       1       4       2       2       5       1       3       4       1 
## 1070150 1071919 1072040 1072230 1072727 1074301 1074509 1075779 1083633 1083716 
##       1       2       1       1       1       3       1       1       1       5 
## 1083815 1084508 1085851 1085976 1097567 1194265 1194380 1194810 1194901 1195189 
##       2       9       2       2       1       4       6       2       1       1 
## 1195577 1196203 1199009 1223023 1238229 1240183 1240720 1241983 1242270 1242361 
##       1       1       1       3       1       1       7       1       1       1 
## 1242437 1242908 1243781 1247832 1248392 1257450 1257567 1258649 1263813 1264183 
##       2       1       1       3       9       1       1       2       1       1 
## 1264381 1264670 1266758 1266840 1267038 1267079 1272822 1278662 1279124 1309392 
##       1       2       1       1       1       1       2       1       3       1 
## 1309574 1311984 1312362 1312693 1320100 1324508 1324839 1332220 1335348 1335546 
##       3       1       1       1       1       1       1       1       1       1 
## 1341148 1343367 1345206 1346675 1349430 1352954 1354091 1357607 1362318 1369487 
##       1       1       1       1       1       1       1       1       2       1 
## 1369677 1372655 1375211 1376870 1381144 1381375 1381599 1381896 1382829 1383413 
##       1       1       2       1       1       2       4       2       1       1 
## 1384221 1385251 1392810 1392893 1398148 1399898 1402064 1406057 1412790 1420694 
##       1       1       1       1       3       1       2       1       1       4 
## 1423615 1426592 1431667 1437375 1438035 1459262 1464668 1469675 1472836 1474600 
##       5       1       1       1       3       1       1       1       1       1 
## 1475011 1475201 1475284 1475607 1475920 1482975 1483239 1483361 1484039 1485945 
##       7       8       6       1       1       1       1       1       1       1 
## 1486257 1487339 1488659 1489822 1493204 1495365 1495407 1495720 1496256 1496355 
##       1       1       1       4       1       2       1       1       1       2 
## 1497007 1497551 1497825 1498203 1501188 1501451 1505494 1507094 1507250 1507532 
##       3       1       1       1       1       7      11       9       7       7 
## 1507557 1507870 1509181 1509835 1510353 1512409 1515352 1519149 1520287 1524883 
##       1       1       2       1       1       1       1       1       2       1 
## 1524966 1528520 1529023 1531359 1534809 1535103 1535392 1549021 1575323 1595347 
##       1       1       1       1       1       1       1       1       1       1 
## 1632660 1638972 1640556 1641521 1654078 1661271 1662386 1664390 1664507 1664895 
##       1       1       1       2       6       6       2       1       2       1 
## 1669647 1677632 1691989 1697051 1697234 1699800 1699933 1700996  205880  206086 
##       1       1       1       1       2       1       1       1       1       1 
##  207845  208371  208538  209205  209304  209387  209510  209528  209536  209908 
##       2       4       1       1       4       6       2       2       8      18 
##  209916  209924  209940  209965  209973  210260  211458  215590  215707  215897 
##       5       4       3      11       3       1       1       1       3       1 
##  219410  245647  245654  245662  245670  245688  245696  262279  268029  275610 
##       1       2       1       6       6      11       1       1       1       1 
##  282632  286427  305656  314500  317206  317453  317495  317941  318048  318584 
##       1       1       1       1       1       2       1       2       1       1 
##  318931  318949  319145  319244  320655  322479  322529  322875  323345  323451 
##       1       2       1       2       1       2       1       2       1       4 
##  323733  324236  325449  325456  325464  325472  325480  325498  325555  325563 
##       1       1       1       1       1       6       1       1       2       2 
##  325589  325613  325647  325670  325696  325704  327650  328237  328252  328336 
##       5       1       5       5       1       8       1       2       1       1 
##  328351  328385  328872  329573  329755  329805  334649  334656  334664  334672 
##       1       1       1       4       2       4       6       4       8       6 
##  334680  334706  334722  334730  334748  334821  334847  336297  336495  336545 
##       1       9       3       1       7       3       1       2       5       5 
##  336560  336578  336586  336594  336602  336610  336628  336636  336990  337295 
##       4       1       6       5       2       3       7       2       1       1 
##  337436  337568  337592  337733  337741  337766  338640  339499  340224  340281 
##       7       3       4       2       3       2       1       1       4       1 
##  340299  340315  340349  340372  340380  340398  340414  340448  340463  343566 
##       4      10       8       4       3       1       4       1       1       5 
##  381814  411702  427690  432773  433037  433219  433235  433326  434035  434076 
##       1       1       1       2       1       1       2       1       1       1 
##  434159  434233  434282  434373  434381  434399  436261  436311  436352  436378 
##       1       4       2       1       1       1       2       4       1       1 
##  436386  436444  436543  436592  436642  436667  436725  436758  436782  437210 
##       2       1       1       1       2       1       1       1       2       4 
##  437228  437236  437244  437251  437269  437277  437285  437319  437327  437335 
##      30       6       9       2       4      10      11       7       2       2 
##  437350  437400  437442  437475  437509  437707  437715  437723  437731  437749 
##       9       2       1       1       3       4       5       2       4       2 
##  437772  449512  449819  449868  466730  469205  469700  478404  478420  481903 
##       2       1       1       7      10       2       4       1       2       3 
##  482042  488619  488635  493239  493544  494633  494732  495259  495812  496166 
##       1       8       1       2       6       1       1       4       4       5 
##  496265  496281  496521  496653  497081  498824  499699  500348  500611  501411 
##       2       1       1       1       3       2       6       3       4       2 
##  501502  501601  501676  501809  501957  502435  502484  504993  505149  510800 
##       2       3       4       2       5       1       1       2       1       1 
##  516773  519645  520486  521179  522318  523423  523621  523761  523860  525451 
##       1       1      10       4       2       2       2       1       1       1 
##  525857  526400  535823  536029  536128  536151  536326  541011  541649  542829 
##       1       1       7       2       4       2      10       1       1       1 
##  543116  546002  555599  555862  555946  556241  556266  556290  556472  556571 
##       1       9       1       1       4       1       1       1       4       9 
##  561662  565200  565234  565267  566141  566158  566422  566430  566448  566455 
##       2       1       6       1      16       3       1       1       1       4 
##  566463  566471  567743  567750  578260  578278  578286  578310  578336  578351 
##       1      11       1       3       2       1       4       1       1       1 
##  578401  578443  578518  578526  578534  578542  581710  581728  581736  581744 
##       3       3       8       9       7       6       5       1       6       6 
##  581876  581892  581900  582114  582148  582163  582171  582254  582304  582312 
##       2       1       2       2       1       6       1       1       2       8 
##  582387  582403  582411  582833  582866  582890  582981  583013  583088  583328 
##       4      10       4       7       3      12       2       6       2       3 
##  583476  583534  583567  583591  587121  598581  599159  599365  601492  601708 
##       3       1       1       9       2       6       2       7       5       1 
##  603878  605501  607424  607556  628826  628842  629261  639732  639922  642801 
##       2       1       1       5       1       2       1       3       8       3 
##  642892  642926  643692  643783  643874  644690  646646  646711  647172  647792 
##       1       2       2       3       1       1       2       2       1       4 
##  649129  649673  650002  650036  652081  659698  659722  659896  659953  662734 
##       1       1       1       4       1       2       2       9       3       1 
##  662742  662940  662957  663005  663096  663112  663138  663534  663542  663559 
##       1       2       4       1       2       7       1       2       1       7 
##  663609  663971  664292  664490  664698  664748  664912  664920  674564  689836 
##       1       5       1       1      15       5       1       1       1       1 
##  690008  692434  693465  693499  694315  694562  694570  694588  694604  697557 
##       3       4       1       5       1       1       5       4       5       2 
##  703215  703223  703231  703256  703413  703744  703751  704072  704460  705053 
##       3       1       1       1       1      11       6       1       2       6 
##  705160  705475  722918  725770  725861  728196  728717  732321  732339  732347 
##       1       1       1      10       3       2       5       4       1       1 
##  732461  732495  743773  743799  743831  744573  745448  753319  759613  762468 
##       2       6       6       1       8       1       1       1       7       1 
##  762773  762856  762864  762906  762914  763151  763169  764134  764936  765164 
##       8       1       1       4       6       1       2       1      12       1 
##  765297  765305  765313  765396  765412  773788  773846  773903  774026  774455 
##       3       9       2       7       6       2       2       1       2       1 
##  774679  774703  775346  775833  775874  777110  777334  777656  777680  777714 
##       2       1       1       1       2       1       1       6      16       1 
##  777995  778233  778738  779041  779868  780783  781278  781302  781369  781930 
##       5       6       3       1       1       1       4       3      11       2 
##  782078  782102  782615  824813  826479  828962  829176  829325  831305  832253 
##       1       5       2       1       1       1       1       2       2       1 
##  832279  832287  832311  832337  834960  834994  835033  846014  869198  870345 
##       8       4       1       5       4       1       1       1       1       2 
##  870444  871012  872515  874214  875443  879791  879817  883454  883884  884510 
##       1       1       9       1       1      10       1       1       1       2 
##  884528  884544  884551  884593  884601  884650  884825  885277  885392  900647 
##       3       3       3       3       1       1       1       1       4       1 
##  900761  900910  900977  901413  922054      99 
##       3       5       2       1       1     225 
## [1] "Frequency table after encoding"
## p63. ¿En qué escuela está matriculado/a o está pensando matricular a ${student_name}
## 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 
##   2   2   1   1   1   3   4   1   1   2   1   1   1   4   1   1   2   1   2   1   1 
## 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 
##   1   2   3   2   1   1   1   1   1   1   1   1   2   6   1   2   2   1   1   4   2 
## 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##   5   1   2   8   4   1   1   1   3   2   4   3   1   1   2  11   1  11   7   1   8 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 
##   1   1   1   1   2   5   7   1   1   3   9   1   4   1   1   1   5  11   5   1   2 
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 
##   2   4   1   2   3   3   1   4   1   3   3   1   1   1   1   1   1   1   3   1   1 
## 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 
##   1   1   1   3   4   8   3   1   1   4   1   2   9   5   7   1   6   4   2   1   2 
## 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 
##   1   2   1   1   1   1   1   2   1   1   2   1   2   1   5   6   2   1   2   2   1 
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 
##   1   1   9   8   7   4   4   1   1   1   1   1   3   1   1   1   1   2   2   2   1 
## 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##   2   1   1  11   2   3   1   1   2   3   1   5   1   1   1   4   2   1   1   1   7 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 
##   1  10   2   1   2   1   1   1   1   1   1   1   2  16   6   1   1   1   1   1   3 
## 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 
##  10   4   7   1   1   9   3  15   6   1   4   5   1   1   1   1   1   1   5   1   1 
## 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 
##   1   3   1   3   4   1   2   4   1   1   9   6   2   2  10   4   1  12   1   5   5 
## 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 
##   1   2   1   2   2   3   6   3   5  10   2   5   1   2   1   6   1   4   2   1   1 
## 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 
##   3   2   1   1   1   5   3   1   3   1   2   1   2   1   8   1   2   2   9   4   7 
## 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   3   3   2   1   1   4   4   6   1   2   1   6   2   2   3   6   4   2   1   9   1 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 
##   1   1   1   1   2   2   1   2   4   9   5   1   1   1   1   2   4   1   1   1   1 
## 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 
##   2   1   6   2   2   2   1   1   1   2   1   1   1   1   1   3   1   1   1   1   6 
## 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 
##   1  11   2   3   2   1   1   6   2   1   1   1   1   1   3   8   1   1   2   1   1 
## 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 
##   1   1   4   2   1   3   2   3   9   5   1   1   7   1  11   1   1   1   1   1   1 
## 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 
##   4   1   1   1   4   1   3   1  11   1   1   1   2   2   1   2   4   1   1  10   1 
## 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 
##   4   2   1   3   1  30   1   1   1   1   5   1   2   3   8   1   1   3   1   3   2 
## 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 
##   2   3   1  16   2   6   1   1   1   1   1   2   1   1   1  18   5   1   5   7   4 
## 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 
##   8   2   1   6   1   8   1   9   1   8   7   6   1   8   1   4   1   1   7   1   2 
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 
##   9   2 634   1   1   2   1   4   7   8   1   1   1   2   1   2   1   1   1   1   1 
## 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 
##   4   3   1   1   6   2   4   1   1   1   2   1   3   1   1   3   1   1   1   2   1 
## 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 
##   1   5   9   1   6   1   7   2   1   1   1   1   5   2   1   4   4   1   2   1   2 
## 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 
##   1   3   3   1   4   1   1   1   6   1   4   2   4   1   6   4   1   1  12   1   1 
## 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 
##   4   1   1   1   5   1   3   1   1   1   1   1   2   4   1   1   1   1   1   6   4 
## 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 
##   1   2   1   2   4   9   1   1   1   8   3  10   2   2   5   2   2   1   2   1   7 
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 
##   1   6   1   1   4   1   5   3   1   2   1   1   1   3   8   6   6   1   1   7   2 
## 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 
##   2   2   2   5   5  10   1   3   2   1   2   1   1   2   1   1   1   1   2   1   3 
## 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 
##   1   1   4   1   4   1   7   1   1   6   4   1   1   1   1   1   3   1   2   7   6 
## 826 827 828 829 830 831 832 833 834 835 836 837 838 839 
##   1   6   5 225  10   1   5   2   4   6   1   5   7   1
dropvars <- c("p62")
mydata <- mydata[!names(mydata) %in% dropvars]

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Recode ages

mydata$hh_ageinyears <- as.numeric(mydata$hh_ageinyears)

break_age <- c(18,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50)
labels_age <- c("30 or younger" =1, 
                "31"=2,
                "32"=3,
                "33"=4,
                "34"=5,
                "35"=6,
                "36"=7,
                "37"=8,
                "38"=9,
                "39"=10,
                "40"=11,
                "41"=12,
                "42"=13,
                "43"=14,
                "44"=15,
                "45"=16,
                "46"=17,
                "47"=18,
                "48"=19,
                "49"=20,
                "50 or older"=21,
                "NA" = 22)
mydata <- ordinal_recode (variable="hh_ageinyears", break_points=break_age, missing=999999, value_labels=labels_age)

## [1] "Frequency table before encoding"
## hh_ageinyears. 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33   34 
##    7    9   13    4   15    6    4    9   10   11   15   30   35   58   86   76   98 
##   35   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51 
##  122  104  125  139  127  142  126  117  126  101  108  104   82   77   84   66   72 
##   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68 
##   57   41   50   48   35   27   23   20    9   11   13   11   12    7    9    5    3 
##   69   70   71   72   73   74   75   76   77   78   79   80   83   84   87 <NA> 
##    2    5    8    2    4    1    3    1    1    2    1    2    5    3    1    5 
##     recoded
##      [18,31) [31,32) [32,33) [33,34) [34,35) [35,36) [36,37) [37,38) [38,39) [39,40)
##   18       7       0       0       0       0       0       0       0       0       0
##   19       9       0       0       0       0       0       0       0       0       0
##   20      13       0       0       0       0       0       0       0       0       0
##   21       4       0       0       0       0       0       0       0       0       0
##   22      15       0       0       0       0       0       0       0       0       0
##   23       6       0       0       0       0       0       0       0       0       0
##   24       4       0       0       0       0       0       0       0       0       0
##   25       9       0       0       0       0       0       0       0       0       0
##   26      10       0       0       0       0       0       0       0       0       0
##   27      11       0       0       0       0       0       0       0       0       0
##   28      15       0       0       0       0       0       0       0       0       0
##   29      30       0       0       0       0       0       0       0       0       0
##   30      35       0       0       0       0       0       0       0       0       0
##   31       0      58       0       0       0       0       0       0       0       0
##   32       0       0      86       0       0       0       0       0       0       0
##   33       0       0       0      76       0       0       0       0       0       0
##   34       0       0       0       0      98       0       0       0       0       0
##   35       0       0       0       0       0     122       0       0       0       0
##   36       0       0       0       0       0       0     104       0       0       0
##   37       0       0       0       0       0       0       0     125       0       0
##   38       0       0       0       0       0       0       0       0     139       0
##   39       0       0       0       0       0       0       0       0       0     127
##   40       0       0       0       0       0       0       0       0       0       0
##   41       0       0       0       0       0       0       0       0       0       0
##   42       0       0       0       0       0       0       0       0       0       0
##   43       0       0       0       0       0       0       0       0       0       0
##   44       0       0       0       0       0       0       0       0       0       0
##   45       0       0       0       0       0       0       0       0       0       0
##   46       0       0       0       0       0       0       0       0       0       0
##   47       0       0       0       0       0       0       0       0       0       0
##   48       0       0       0       0       0       0       0       0       0       0
##   49       0       0       0       0       0       0       0       0       0       0
##   50       0       0       0       0       0       0       0       0       0       0
##   51       0       0       0       0       0       0       0       0       0       0
##   52       0       0       0       0       0       0       0       0       0       0
##   53       0       0       0       0       0       0       0       0       0       0
##   54       0       0       0       0       0       0       0       0       0       0
##   55       0       0       0       0       0       0       0       0       0       0
##   56       0       0       0       0       0       0       0       0       0       0
##   57       0       0       0       0       0       0       0       0       0       0
##   58       0       0       0       0       0       0       0       0       0       0
##   59       0       0       0       0       0       0       0       0       0       0
##   60       0       0       0       0       0       0       0       0       0       0
##   61       0       0       0       0       0       0       0       0       0       0
##   62       0       0       0       0       0       0       0       0       0       0
##   63       0       0       0       0       0       0       0       0       0       0
##   64       0       0       0       0       0       0       0       0       0       0
##     recoded
##      [40,41) [41,42) [42,43) [43,44) [44,45) [45,46) [46,47) [47,48) [48,49) [49,50)
##   18       0       0       0       0       0       0       0       0       0       0
##   19       0       0       0       0       0       0       0       0       0       0
##   20       0       0       0       0       0       0       0       0       0       0
##   21       0       0       0       0       0       0       0       0       0       0
##   22       0       0       0       0       0       0       0       0       0       0
##   23       0       0       0       0       0       0       0       0       0       0
##   24       0       0       0       0       0       0       0       0       0       0
##   25       0       0       0       0       0       0       0       0       0       0
##   26       0       0       0       0       0       0       0       0       0       0
##   27       0       0       0       0       0       0       0       0       0       0
##   28       0       0       0       0       0       0       0       0       0       0
##   29       0       0       0       0       0       0       0       0       0       0
##   30       0       0       0       0       0       0       0       0       0       0
##   31       0       0       0       0       0       0       0       0       0       0
##   32       0       0       0       0       0       0       0       0       0       0
##   33       0       0       0       0       0       0       0       0       0       0
##   34       0       0       0       0       0       0       0       0       0       0
##   35       0       0       0       0       0       0       0       0       0       0
##   36       0       0       0       0       0       0       0       0       0       0
##   37       0       0       0       0       0       0       0       0       0       0
##   38       0       0       0       0       0       0       0       0       0       0
##   39       0       0       0       0       0       0       0       0       0       0
##   40     142       0       0       0       0       0       0       0       0       0
##   41       0     126       0       0       0       0       0       0       0       0
##   42       0       0     117       0       0       0       0       0       0       0
##   43       0       0       0     126       0       0       0       0       0       0
##   44       0       0       0       0     101       0       0       0       0       0
##   45       0       0       0       0       0     108       0       0       0       0
##   46       0       0       0       0       0       0     104       0       0       0
##   47       0       0       0       0       0       0       0      82       0       0
##   48       0       0       0       0       0       0       0       0      77       0
##   49       0       0       0       0       0       0       0       0       0      84
##   50       0       0       0       0       0       0       0       0       0       0
##   51       0       0       0       0       0       0       0       0       0       0
##   52       0       0       0       0       0       0       0       0       0       0
##   53       0       0       0       0       0       0       0       0       0       0
##   54       0       0       0       0       0       0       0       0       0       0
##   55       0       0       0       0       0       0       0       0       0       0
##   56       0       0       0       0       0       0       0       0       0       0
##   57       0       0       0       0       0       0       0       0       0       0
##   58       0       0       0       0       0       0       0       0       0       0
##   59       0       0       0       0       0       0       0       0       0       0
##   60       0       0       0       0       0       0       0       0       0       0
##   61       0       0       0       0       0       0       0       0       0       0
##   62       0       0       0       0       0       0       0       0       0       0
##   63       0       0       0       0       0       0       0       0       0       0
##   64       0       0       0       0       0       0       0       0       0       0
##     recoded
##      [50,1e+06)
##   18          0
##   19          0
##   20          0
##   21          0
##   22          0
##   23          0
##   24          0
##   25          0
##   26          0
##   27          0
##   28          0
##   29          0
##   30          0
##   31          0
##   32          0
##   33          0
##   34          0
##   35          0
##   36          0
##   37          0
##   38          0
##   39          0
##   40          0
##   41          0
##   42          0
##   43          0
##   44          0
##   45          0
##   46          0
##   47          0
##   48          0
##   49          0
##   50         66
##   51         72
##   52         57
##   53         41
##   54         50
##   55         48
##   56         35
##   57         27
##   58         23
##   59         20
##   60          9
##   61         11
##   62         13
##   63         11
##   64         12
##  [ reached getOption("max.print") -- omitted 19 rows ]
## [1] "Frequency table after encoding"
## hh_ageinyears
## 30 or younger            31            32            33            34            35 
##           168            58            86            76            98           122 
##            36            37            38            39            40            41 
##           104           125           139           127           142           126 
##            42            43            44            45            46            47 
##           117           126           101           108           104            82 
##            48            49   50 or older          <NA> 
##            77            84           560             5 
## [1] "Inspect value labels and relabel as necessary"
## 30 or younger            31            32            33            34            35 
##             1             2             3             4             5             6 
##            36            37            38            39            40            41 
##             7             8             9            10            11            12 
##            42            43            44            45            46            47 
##            13            14            15            16            17            18 
##            48            49   50 or older            NA 
##            19            20            21            22
# Recode education attainment of adults to reduce risk of re-identification 

break_edu <- c(-98,1,3,4,5,6,7,8,9)
labels_edu <- c("No se"=1,
                "Pri Incomp or less"=2,
                "Pri Comp"=3,
                "Sec Incomp"=4,
                "Sec Comp"=5,
                "Tec Incomp"=6,
                "Tec Comp"=7,
                "Uni Incomp"=8,
                "Uni Comp"=9)
mydata <- ordinal_recode (variable="p6_1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6_1. Padre
##      No sé  Sin nivel Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp 
##         10          4         74        146        344        751         84 
##   Tec Comp Uni Incomp   Uni Comp       <NA> 
##        203         57         81        981 
##      recoded
##       [-98,1) [1,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,1e+06)
##   -98      10     0     0     0     0     0     0     0         0
##   1         0     4     0     0     0     0     0     0         0
##   2         0    74     0     0     0     0     0     0         0
##   3         0     0   146     0     0     0     0     0         0
##   4         0     0     0   344     0     0     0     0         0
##   5         0     0     0     0   751     0     0     0         0
##   6         0     0     0     0     0    84     0     0         0
##   7         0     0     0     0     0     0   203     0         0
##   8         0     0     0     0     0     0     0    57         0
##   9         0     0     0     0     0     0     0     0        81
## [1] "Frequency table after encoding"
## p6_1. Padre
##              No se Pri Incomp or less           Pri Comp         Sec Incomp 
##                 10                 78                146                344 
##           Sec Comp         Tec Incomp           Tec Comp         Uni Incomp 
##                751                 84                203                 57 
##           Uni Comp               <NA> 
##                 81                981 
## [1] "Inspect value labels and relabel as necessary"
##              No se Pri Incomp or less           Pri Comp         Sec Incomp 
##                  1                  2                  3                  4 
##           Sec Comp         Tec Incomp           Tec Comp         Uni Incomp 
##                  5                  6                  7                  8 
##           Uni Comp 
##                  9
break_edu <- c(-98,1,2,3,4,5,6)
labels_edu <- c("No se"=1,
                "Sin nivel"=2,
                "Pri Incomp"=3,
                "Pri Comp"=4,
                "Sec Incomp"=5,
                "Sec Comp"=6,
                "Tec Incomp/Comp or Uni Incomp/Comp"=7)
mydata <- ordinal_recode (variable="p6b1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6b1. Abuelo / Abuela 1
##      No sé  Sin nivel Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp 
##         17         42         89         87         48         77          9 
##   Tec Comp Uni Incomp   Uni Comp       <NA> 
##         15          4          6       2341 
##      recoded
##       [-98,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
##   -98      17     0     0     0     0     0         0
##   1         0    42     0     0     0     0         0
##   2         0     0    89     0     0     0         0
##   3         0     0     0    87     0     0         0
##   4         0     0     0     0    48     0         0
##   5         0     0     0     0     0    77         0
##   6         0     0     0     0     0     0         9
##   7         0     0     0     0     0     0        15
##   8         0     0     0     0     0     0         4
##   9         0     0     0     0     0     0         6
## [1] "Frequency table after encoding"
## p6b1. Abuelo / Abuela 1
##                              No se                          Sin nivel 
##                                 17                                 42 
##                         Pri Incomp                           Pri Comp 
##                                 89                                 87 
##                         Sec Incomp                           Sec Comp 
##                                 48                                 77 
## Tec Incomp/Comp or Uni Incomp/Comp                               <NA> 
##                                 34                               2341 
## [1] "Inspect value labels and relabel as necessary"
##                              No se                          Sin nivel 
##                                  1                                  2 
##                         Pri Incomp                           Pri Comp 
##                                  3                                  4 
##                         Sec Incomp                           Sec Comp 
##                                  5                                  6 
## Tec Incomp/Comp or Uni Incomp/Comp 
##                                  7
break_edu <- c(-98,1,3,4)
labels_edu <- c("No se"=1,
                "Pri Incomp or less"=2,
                "Pri Comp"=3,
                "Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp"=4)
mydata <- ordinal_recode (variable="p6b2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6b2. Abuelo / Abuela 2
##      No sé  Sin nivel Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp 
##          2         11         24         32         16         35          2 
##   Tec Comp   Uni Comp       <NA> 
##          5          2       2606 
##      recoded
##       [-98,1) [1,3) [3,4) [4,1e+06)
##   -98       2     0     0         0
##   1         0    11     0         0
##   2         0    24     0         0
##   3         0     0    32         0
##   4         0     0     0        16
##   5         0     0     0        35
##   6         0     0     0         2
##   7         0     0     0         5
##   9         0     0     0         2
## [1] "Frequency table after encoding"
## p6b2. Abuelo / Abuela 2
##                                               No se 
##                                                   2 
##                                  Pri Incomp or less 
##                                                  35 
##                                            Pri Comp 
##                                                  32 
## Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp 
##                                                  60 
##                                                <NA> 
##                                                2606 
## [1] "Inspect value labels and relabel as necessary"
##                                               No se 
##                                                   1 
##                                  Pri Incomp or less 
##                                                   2 
##                                            Pri Comp 
##                                                   3 
## Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp 
##                                                   4
break_edu <- c(-98,1,4,5,6)
labels_edu <- c("No se"=1,
                "Pri Comp or less"=2,
                "Sec Incomp"=3,
                "Sec Comp"=4,
                "Tec Incomp/Comp or Uni Incomp/Comp"=5)
mydata <- ordinal_recode (variable="p6c1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6c1. Tío / Tía 1
##      No sé  Sin nivel Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp 
##          6          5          9         17         36        120          7 
##   Tec Comp Uni Incomp   Uni Comp       <NA> 
##         32          9         26       2468 
##      recoded
##       [-98,1) [1,4) [4,5) [5,6) [6,1e+06)
##   -98       6     0     0     0         0
##   1         0     5     0     0         0
##   2         0     9     0     0         0
##   3         0    17     0     0         0
##   4         0     0    36     0         0
##   5         0     0     0   120         0
##   6         0     0     0     0         7
##   7         0     0     0     0        32
##   8         0     0     0     0         9
##   9         0     0     0     0        26
## [1] "Frequency table after encoding"
## p6c1. Tío / Tía 1
##                              No se                   Pri Comp or less 
##                                  6                                 31 
##                         Sec Incomp                           Sec Comp 
##                                 36                                120 
## Tec Incomp/Comp or Uni Incomp/Comp                               <NA> 
##                                 74                               2468 
## [1] "Inspect value labels and relabel as necessary"
##                              No se                   Pri Comp or less 
##                                  1                                  2 
##                         Sec Incomp                           Sec Comp 
##                                  3                                  4 
## Tec Incomp/Comp or Uni Incomp/Comp 
##                                  5
break_edu <- c(-98,1,5,6)
labels_edu <- c("No se"=1,
                "Sec Incomp or less"=2,
                "Sec Comp"=3,
                "Tec Imcomp/Comp or Uni Incomp/Comp"=4)
mydata <- ordinal_recode (variable="p6c2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6c2. Tío / Tía 2
##      No sé Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp   Tec Comp 
##          3          6          5         16         64          2         19 
## Uni Incomp   Uni Comp       <NA> 
##          5          8       2607 
##      recoded
##       [-98,1) [1,5) [5,6) [6,1e+06)
##   -98       3     0     0         0
##   2         0     6     0         0
##   3         0     5     0         0
##   4         0    16     0         0
##   5         0     0    64         0
##   6         0     0     0         2
##   7         0     0     0        19
##   8         0     0     0         5
##   9         0     0     0         8
## [1] "Frequency table after encoding"
## p6c2. Tío / Tía 2
##                              No se                 Sec Incomp or less 
##                                  3                                 27 
##                           Sec Comp Tec Imcomp/Comp or Uni Incomp/Comp 
##                                 64                                 34 
##                               <NA> 
##                               2607 
## [1] "Inspect value labels and relabel as necessary"
##                              No se                 Sec Incomp or less 
##                                  1                                  2 
##                           Sec Comp Tec Imcomp/Comp or Uni Incomp/Comp 
##                                  3                                  4
break_edu <- c(-98,1,6)
labels_edu <- c("No se"=1,
                "Sec Comp or less"=2,
                "Tec Imcomp/Comp or Uni Incomp/Comp"=3)
mydata <- ordinal_recode (variable="p6c3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## p6c3. Tío / Tía 3
## Pri Incomp   Pri Comp Sec Incomp   Sec Comp Tec Incomp   Tec Comp Uni Incomp 
##          1          1          6         21          4          5          1 
##   Uni Comp       <NA> 
##          4       2692 
##    recoded
##     [-98,1) [1,6) [6,1e+06)
##   2       0     1         0
##   3       0     1         0
##   4       0     6         0
##   5       0    21         0
##   6       0     0         4
##   7       0     0         5
##   8       0     0         1
##   9       0     0         4
## [1] "Frequency table after encoding"
## p6c3. Tío / Tía 3
##                   Sec Comp or less Tec Imcomp/Comp or Uni Incomp/Comp 
##                                 29                                 14 
##                               <NA> 
##                               2692 
## [1] "Inspect value labels and relabel as necessary"
##                              No se                   Sec Comp or less 
##                                  1                                  2 
## Tec Imcomp/Comp or Uni Incomp/Comp 
##                                  3
# Top code household composition variables with large and unusual numbers 

mydata <- top_recode ("p1", break_point=10, missing=c(888, 999999))  
## [1] "Frequency table before encoding"
## p1. ¿Cuántas personas viven en total en el hogar?
##    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 <NA> 
##   86  323  707  746  427  201  102   59   37   15   13    6    6    1    1    5

## [1] "Frequency table after encoding"
## p1. ¿Cuántas personas viven en total en el hogar?
##          2          3          4          5          6          7          8 
##         86        323        707        746        427        201        102 
##          9 10 or more       <NA> 
##         59         79          5

mydata <- top_recode ("p2c", break_point=5, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2c. Hermanos o hermanas de ${student_name}
##    0    1    2    3    4    5    6    7    8 <NA> 
##  354  861  859  422  153   51   19    9    2    5

## [1] "Frequency table after encoding"
## p2c. Hermanos o hermanas de ${student_name}
##         0         1         2         3         4 5 or more      <NA> 
##       354       861       859       422       153        81         5

mydata <- top_recode ("p2d", break_point=2, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2d. Abuelos o abuelas vive de ${student_name}
##    0    1    2    3 <NA> 
## 2333  268  126    3    5

## [1] "Frequency table after encoding"
## p2d. Abuelos o abuelas vive de ${student_name}
##         0         1 2 or more      <NA> 
##      2333       268       129         5

mydata <- top_recode ("p2e", break_point=3, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2e. Tíos o tías de ${student_name}
##    0    1    2    3    4    5    6 <NA> 
## 2412  174   95   34    9    3    3    5

## [1] "Frequency table after encoding"
## p2e. Tíos o tías de ${student_name}
##         0         1         2 3 or more      <NA> 
##      2412       174        95        49         5

mydata <- top_recode ("p2f", break_point=2, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2f. Sobrinos de ${student_name}
##    0    1    2    3    4    6 <NA> 
## 2511  135   51   19   13    1    5

## [1] "Frequency table after encoding"
## p2f. Sobrinos de ${student_name}
##         0         1 2 or more      <NA> 
##      2511       135        84         5

mydata <- top_recode ("p2g", break_point=3, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2g. Otros familiares o miembros que vivan en el hogar
##    0    1    2    3    4    5    6    7 <NA> 
## 2303  325   55   29    9    4    2    3    5

## [1] "Frequency table after encoding"
## p2g. Otros familiares o miembros que vivan en el hogar
##         0         1         2 3 or more      <NA> 
##      2303       325        55        47         5

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_1)[na.exclude(mydata$p7_1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7_1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7_1. Padre
##   -97     0    97   100   180   200   250   300   350   400   450   460   480   500 
##   317     1     4     4     1     4     2     9     2    13     3     1     3    39 
##   508   528   560   570   580   600   700   720   750   800   820   850   858   900 
##     1     1     1     1     1    30    29     1    11   121     1   202     2    64 
##   910   920   950  1000  1025  1050  1070  1100  1200  1240  1280  1300  1350  1400 
##     1     1     2   205     1     2     1    15   182     1     1    38     1    30 
##  1450  1460  1500  1508  1600  1700  1800  1900  2000  2050  2100  2200  2300  2400 
##     2     1   144     1    28     5    40     2    70     1     1     6     1     8 
##  2500  2580  2700  2800  2900  3000  3100  3200  3400  3500  3600  4000  4500  4900 
##    15     1     2     5     1    18     1     1     1     4     1     5     1     1 
##  5000  6000 15000  <NA> 
##     1     1     1  1015

## [1] "Frequency table after encoding"
## p7_1. Padre
##          -97            0           97          100          180          200 
##          317            1            4            4            1            4 
##          250          300          350          400          450          460 
##            2            9            2           13            3            1 
##          480          500          508          528          560          570 
##            3           39            1            1            1            1 
##          580          600          700          720          750          800 
##            1           30           29            1           11          121 
##          820          850          858          900          910          920 
##            1          202            2           64            1            1 
##          950         1000         1025         1050         1070         1100 
##            2          205            1            2            1           15 
##         1200         1240         1280         1300         1350         1400 
##          182            1            1           38            1           30 
##         1450         1460         1500         1508         1600         1700 
##            2            1          144            1           28            5 
##         1800         1900         2000         2050         2100         2200 
##           40            2           70            1            1            6 
##         2300         2400         2500         2580         2700         2800 
##            1            8           15            1            2            5 
##         2900         3000         3100         3200         3400         3500 
##            1           18            1            1            1            4 
##         3600 4000 or more         <NA> 
##            1           10         1015

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_2)[na.exclude(mydata$p7_2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7_2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7_2. Madre
##  -97    0   50   60   80   97  100  120  150  160  180  200  240  250  280  300  332 
##  229    2    1    2    3    2    9    1    4    1    1   23    2    3    4   24    1 
##  350  360  380  400  425  440  450  460  480  488  500  520  540  550  560  570  600 
##    4    5    1   46    2    2    4    2    7    1   88    1    1    2    1    1   77 
##  608  650  700  708  720  740  750  800  808  820  850  858  860  880  900  920  950 
##    2    5   43    1    5    1   19  108    1    1  228    1    1    2   56    1    4 
## 1000 1030 1050 1070 1090 1100 1200 1290 1300 1350 1400 1500 1600 1700 1800 2000 2200 
##   99    1    2    2    1   11   54    1   11    1    9   63   14    2    7   10    2 
## 2300 2350 2400 2500 2600 2800 3000 3500 4000 4500 <NA> 
##    1    1    1    3    1    3    5    1    1    1 1391

## [1] "Frequency table after encoding"
## p7_2. Madre
##          -97            0           50           60           80           97 
##          229            2            1            2            3            2 
##          100          120          150          160          180          200 
##            9            1            4            1            1           23 
##          240          250          280          300          332          350 
##            2            3            4           24            1            4 
##          360          380          400          425          440          450 
##            5            1           46            2            2            4 
##          460          480          488          500          520          540 
##            2            7            1           88            1            1 
##          550          560          570          600          608          650 
##            2            1            1           77            2            5 
##          700          708          720          740          750          800 
##           43            1            5            1           19          108 
##          808          820          850          858          860          880 
##            1            1          228            1            1            2 
##          900          920          950         1000         1030         1050 
##           56            1            4           99            1            2 
##         1070         1090         1100         1200         1290         1300 
##            2            1           11           54            1           11 
##         1350         1400         1500         1600         1700         1800 
##            1            9           63           14            2            7 
##         2000         2200         2300         2350         2400         2500 
##           10            2            1            1            1            3 
##         2600         2800 3000 or more         <NA> 
##            1            3            8         1391

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a1)[na.exclude(mydata$p7a1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a1. Hermano / Hermana 1
##  -97    0   60   70   97  100  150  160  200  240  250  300  329  350  400  450  460 
##  113    8    1    1    1    3    1    2   11    1    3   10    1    3   13    4    1 
##  480  500  560  600  620  650  700  720  750  800  850  858  900  908  950  960 1000 
##    2   18    1   23    1    3    8    3    5   59  125    1   16    1    3    1   45 
## 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2300 2800 3000 <NA> 
##    6   44    5    5   14    3    1    3    1    8    1    1    3 2148

## [1] "Frequency table after encoding"
## p7a1. Hermano / Hermana 1
##          -97            0           60           70           97          100 
##          113            8            1            1            1            3 
##          150          160          200          240          250          300 
##            1            2           11            1            3           10 
##          329          350          400          450          460          480 
##            1            3           13            4            1            2 
##          500          560          600          620          650          700 
##           18            1           23            1            3            8 
##          720          750          800          850          858          900 
##            3            5           59          125            1           16 
##          908          950          960         1000         1100         1200 
##            1            3            1           45            6           44 
##         1300         1400         1500         1600         1700         1800 
##            5            5           14            3            1            3 
##         1900         2000         2300         2800 2926 or more         <NA> 
##            1            8            1            1            3         2148

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a2)[na.exclude(mydata$p7a2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a2. Hermano / Hermana 2
##  -97    0   60   70  100  150  200  220  240  250  280  300  350  400  450  480  500 
##   53    5    1    1    2    1    2    1    2    1    1    2    1    4    3    1   10 
##  510  600  700  750  800  840  850  900  950 1000 1200 1400 1500 1600 1800 2000 2400 
##    1    6    3    6   27    1   60   12    2   12    9    3    5    1    3    2    1 
## <NA> 
## 2490

## [1] "Frequency table after encoding"
## p7a2. Hermano / Hermana 2
##          -97            0           60           70          100          150 
##           53            5            1            1            2            1 
##          200          220          240          250          280          300 
##            2            1            2            1            1            2 
##          350          400          450          480          500          510 
##            1            4            3            1           10            1 
##          600          700          750          800          840          850 
##            6            3            6           27            1           60 
##          900          950         1000         1200         1400         1500 
##           12            2           12            9            3            5 
##         1600         1800         2000 2017 or more         <NA> 
##            1            3            2            1         2490

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a3)[na.exclude(mydata$p7a3)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a3", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a3. Hermano / Hermana 3
##  -97    0    8   80  120  200  380  400  450  520  600  700  750  800  840  850  900 
##   20    1    1    1    1    2    1    1    1    1    1    2    3    3    1   15    4 
## 1000 1200 1500 4800 <NA> 
##    6    3    2    1 2664

## [1] "Frequency table after encoding"
## p7a3. Hermano / Hermana 3
##          -97            0            8           80          120          200 
##           20            1            1            1            1            2 
##          380          400          450          520          600          700 
##            1            1            1            1            1            2 
##          750          800          840          850          900         1000 
##            3            3            1           15            4            6 
##         1200         1500 3975 or more         <NA> 
##            3            2            1         2664

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a4)[na.exclude(mydata$p7a4)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a4", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a4. Hermano / Hermana 4
##  -97    0  100  200  500  750  800  850 1000 <NA> 
##    9    1    1    1    1    1    2    8    2 2709

## [1] "Frequency table after encoding"
## p7a4. Hermano / Hermana 4
##          -97            0          100          200          500          750 
##            9            1            1            1            1            1 
##          800          850 1000 or more         <NA> 
##            2            8            2         2709

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a5)[na.exclude(mydata$p7a5)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a5", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a5. Hermano / Hermana 5
##  -97  520  850  858 1300 <NA> 
##    3    1    5    1    1 2724

## [1] "Frequency table after encoding"
## p7a5. Hermano / Hermana 5
##          -97          520          850          858 1284 or more         <NA> 
##            3            1            5            1            1         2724

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a6)[na.exclude(mydata$p7a6)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a6", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a6. Hermano / Hermana 6
##  850 1200 <NA> 
##    2    1 2732

## [1] "Frequency table after encoding"
## p7a6. Hermano / Hermana 6
##          850 1196 or more         <NA> 
##            2            1         2732

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a7)[na.exclude(mydata$p7a7)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a7", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a7. Hermano / Hermana 7
##  850 <NA> 
##    1 2734

## [1] "Frequency table after encoding"
## p7a7. Hermano / Hermana 7
## 850 or more        <NA> 
##           1        2734

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7b1)[na.exclude(mydata$p7b1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7b1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7b1. Abuelo / Abuela 1
##  -97    0   97  100  200  300  400  500  600  700  750  800  850  900  950 1000 1100 
##   19    1    1    1    1    1    2   10    3    3    2    9   17    5    1    6    1 
## 1200 1300 1500 1600 1800 2000 3000 6300 <NA> 
##    6    1    7    1    3    1    1    1 2631

## [1] "Frequency table after encoding"
## p7b1. Abuelo / Abuela 1
##          -97            0           97          100          200          300 
##           19            1            1            1            1            1 
##          400          500          600          700          750          800 
##            2           10            3            3            2            9 
##          850          900          950         1000         1100         1200 
##           17            5            1            6            1            6 
##         1300         1500         1600         1800         2000         3000 
##            1            7            1            3            1            1 
## 4913 or more         <NA> 
##            1         2631

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7b2)[na.exclude(mydata$p7b2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7b2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7b2. Abuelo / Abuela 2
##  -97    0  100  300  400  500  600  700  750  800  850  900  950 1000 1200 1400 1500 
##    6    3    1    1    1    1    2    2    1    2    6    2    1    3    2    1    1 
## 2000 4000 <NA> 
##    1    1 2697

## [1] "Frequency table after encoding"
## p7b2. Abuelo / Abuela 2
##          -97            0          100          300          400          500 
##            6            3            1            1            1            1 
##          600          700          750          800          850          900 
##            2            2            1            2            6            2 
##          950         1000         1200         1400         1500         2000 
##            1            3            2            1            1            1 
## 3689 or more         <NA> 
##            1         2697

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c1)[na.exclude(mydata$p7c1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c1. Tío / Tía 1
##  -97   97  200  300  320  400  450  480  500  600  700  750  770  800  850  858  900 
##   47    3    4    2    1    3    1    1    4    3   10    1    1   19   48    1   13 
##  950 1000 1100 1200 1300 1400 1500 1600 1800 2000 2400 2800 3000 <NA> 
##    1   13    2   19    3    3   16    1    4    1    1    1    1 2507

## [1] "Frequency table after encoding"
## p7c1. Tío / Tía 1
##          -97           97          200          300          320          400 
##           47            3            4            2            1            3 
##          450          480          500          600          700          750 
##            1            1            4            3           10            1 
##          770          800          850          858          900          950 
##            1           19           48            1           13            1 
##         1000         1100         1200         1300         1400         1500 
##           13            2           19            3            3           16 
##         1600         1800         2000         2400         2800 2819 or more 
##            1            4            1            1            1            1 
##         <NA> 
##         2507

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c2)[na.exclude(mydata$p7c2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c2. Tío / Tía 2
##  -97    0   60   97  500  550  700  750  800  850  858  900  950 1000 1100 1200 1400 
##   27    1    1    2    3    1    3    2    9   22    1    5    1    7    2    6    1 
## 1500 1800 2000 2500 3500 <NA> 
##    4    1    3    2    1 2630

## [1] "Frequency table after encoding"
## p7c2. Tío / Tía 2
##          -97            0           60           97          500          550 
##           27            1            1            2            3            1 
##          700          750          800          850          858          900 
##            3            2            9           22            1            5 
##          950         1000         1100         1200         1400         1500 
##            1            7            2            6            1            4 
##         1800         2000         2500 3114 or more         <NA> 
##            1            3            2            1         2630

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c3)[na.exclude(mydata$p7c3)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c3", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c3. Tío / Tía 3
##  -97   97  500  600  700  800  850  900 1000 1100 1500 2200 <NA> 
##   11    2    2    1    1    3    6    2    2    1    3    1 2700

## [1] "Frequency table after encoding"
## p7c3. Tío / Tía 3
##          -97           97          500          600          700          800 
##           11            2            2            1            1            3 
##          850          900         1000         1100         1500 2119 or more 
##            6            2            2            1            3            1 
##         <NA> 
##         2700

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c4)[na.exclude(mydata$p7c4)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c4", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c4. Tío / Tía 4
##  -97   97  850  900 1100 1200 <NA> 
##    3    1    1    2    1    2 2725

## [1] "Frequency table after encoding"
## p7c4. Tío / Tía 4
##          -97           97          850          900         1100 1200 or more 
##            3            1            1            2            1            2 
##         <NA> 
##         2725

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c5)[na.exclude(mydata$p7c5)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c5", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c5. Tío / Tía 5
##  -97  600  900 3200 <NA> 
##    1    1    1    1 2731

## [1] "Frequency table after encoding"
## p7c5. Tío / Tía 5
##          -97          600          900 3177 or more         <NA> 
##            1            1            1            1         2731

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c6)[na.exclude(mydata$p7c6)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c6", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c6. Tío / Tía 6
##  -97 1500 <NA> 
##    1    1 2733

## [1] "Frequency table after encoding"
## p7c6. Tío / Tía 6
##          -97 1500 or more         <NA> 
##            1            1         2733

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7d1)[na.exclude(mydata$p7d1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7d1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7d1. Sobrino / Sobrina 1
##  -97  850  920 1000 1500 <NA> 
##    6    5    1    2    1 2720

## [1] "Frequency table after encoding"
## p7d1. Sobrino / Sobrina 1
##          -97          850          920         1000 1480 or more         <NA> 
##            6            5            1            2            1         2720

percentile_99.5 <- floor(quantile(na.exclude(mydata$p7d2)[na.exclude(mydata$p7d2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7d2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7d2. Sobrino / Sobrina 2
##  -97  850 <NA> 
##    1    1 2733

## [1] "Frequency table after encoding"
## p7d2. Sobrino / Sobrina 2
##         -97 850 or more        <NA> 
##           1           1        2733

percentile_99.5 <- floor(quantile(na.exclude(mydata$p49)[na.exclude(mydata$p49)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p49", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p49. ¿Cuánto gasta cada mes en total en la educación de todos sus hijos que viven en 
##    0    1    8   10   20   25   30   33   34   40   45   50   58   60   70   72   75 
##   63    1    2    1   16    2   31    1    1   11    2  131    2   30   18    1    1 
##   80   84   90  100  108  120  130  140  150  160  170  180  190  200  208  210  215 
##   52    1    4  280    3   19    5    1  171    5    1   12    2  333    3    3    1 
##  220  230  240  250  260  270  280  282  300  305  308  310  320  330  350  360  372 
##    2    2    4   83    1    1    8    1  321    1    2    1    3    3   57    1    1 
##  375  380  385  390  400  408  410  440  450  460  473  480  500  508  550  580  600 
##    1    2    1    1  192    3    1    1   21    1    1    2  248    2    5    1  105 
##  608  650  700  730  750  757  770  800  840  850  900  960 1000 1008 1100 1200 1230 
##    3    6   52    1    4    1    1   77    1    4   18    1  109    1    2   26    1 
## 1300 1400 1470 1500 1508 1600 1700 1800 2000 2100 2200 2500 2800 3000 3400 3500 4000 
##    4    2    1   51    1    2    1    6   35    1    1    7    1   10    1    1    4 
## 5000 6000 7000 <NA> 
##    1    1    1    5

## [1] "Frequency table after encoding"
## p49. ¿Cuánto gasta cada mes en total en la educación de todos sus hijos que viven en 
##            0            1            8           10           20           25 
##           63            1            2            1           16            2 
##           30           33           34           40           45           50 
##           31            1            1           11            2          131 
##           58           60           70           72           75           80 
##            2           30           18            1            1           52 
##           84           90          100          108          120          130 
##            1            4          280            3           19            5 
##          140          150          160          170          180          190 
##            1          171            5            1           12            2 
##          200          208          210          215          220          230 
##          333            3            3            1            2            2 
##          240          250          260          270          280          282 
##            4           83            1            1            8            1 
##          300          305          308          310          320          330 
##          321            1            2            1            3            3 
##          350          360          372          375          380          385 
##           57            1            1            1            2            1 
##          390          400          408          410          440          450 
##            1          192            3            1            1           21 
##          460          473          480          500          508          550 
##            1            1            2          248            2            5 
##          580          600          608          650          700          730 
##            1          105            3            6           52            1 
##          750          757          770          800          840          850 
##            4            1            1           77            1            4 
##          900          960         1000         1008         1100         1200 
##           18            1          109            1            2           26 
##         1230         1300         1400         1470         1500         1508 
##            1            4            2            1           51            1 
##         1600         1700         1800         2000         2100         2200 
##            2            1            6           35            1            1 
##         2500         2800 3000 or more         <NA> 
##            7            1           19            5

# Top code number of rooms variables with large and unusual numbers

mydata <- top_recode ("p9", break_point=7, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p9. ¿Cuántas habitaciones tiene esta vivienda sin incluir cocina, baños, pasillos ni
##    1    2    3    4    5    6    7    8    9   10   11   12 <NA> 
##  460  999  717  342  125   57   15    7    3    2    1    2    5

## [1] "Frequency table after encoding"
## p9. ¿Cuántas habitaciones tiene esta vivienda sin incluir cocina, baños, pasillos ni
##         1         2         3         4         5         6 7 or more      <NA> 
##       460       999       717       342       125        57        30         5

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("student_female",
                  "hh_gender",
                  "attending_confirm",
                  "grado2016_confirm",
                  "p2a",
                  "p2b",
                  "p3a1",
                  "p3a2",
                  "p3a3",
                  "p3a4",
                  "p3a5",
                  "p3a6",
                  "p3a7",
                  "p3a8",
                  "p3b1",
                  "p3b2",
                  "p3b3",
                  "p3c1",
                  "p3c2",
                  "p3c3",
                  "p3c4",
                  "p3c5",
                  "p3c6",
                  "p3d1",
                  "p3d2",
                  "p3d3",
                  "p3d4",
                  "p3d5",
                  "p3d6",
                  "p26a1",
                  "p26a2",
                  "p26a3",
                  "p26a4",
                  "p26a5",
                  "p26a6",
                  "p26a7",
                  "p26a8",
                  "p26c1",
                  "p26c2",
                  "p26c3",
                  "p26c4",
                  "p26c5",
                  "p26c6",
                  "p26d1",
                  "p26d2",
                  "p26d3",
                  "p26d4",
                  "p26d5",
                  "p26d6",
                  "p4_1",
                  "p4_2",
                  "p4a1",
                  "p4a2",
                  "p4a3",
                  "p4a4",
                  "p4a5",
                  "p4a6",
                  "p4a7",
                  "p4a8",
                  "p4b1",
                  "p4b2",
                  "p4b3",
                  "p4c1",
                  "p4c2",
                  "p4c3",
                  "p4c4",
                  "p4c5",
                  "p4c6",
                  "p4d1",
                  "p4d2",
                  "p4d3",
                  "p5_aa1",
                  "p5_aa2",
                  "p5_aa3",
                  "p5_aa4",
                  "p5_aa5",
                  "p5_aa6",
                  "p5_aa7",
                  "p5_aa8",
                  "p23a",
                  "p23_1",
                  "p23_2",
                  "p23a1",
                  "p23a2",
                  "p23a3",
                  "p23a4",
                  "p23a5",
                  "p23a6",
                  "p23a7",
                  "p23a8",
                  "p23b1",
                  "p23b2",
                  "p23b3",
                  "p23c1",
                  "p23c2",
                  "p23c3",
                  "p23c4",
                  "p23c5",
                  "p23c6",
                  "p23d1",
                  "p23d2",
                  "p23d3",
                  "p8",
                  "p42",
                  "p43",
                  "p44b",
                  "p44b_1",
                  "p44b_2",
                  "p44b_3",
                  "p44b_4",
                  "p44b_5",
                  "p44b_6",
                  "p44b_7",
                  "p44b_8",
                  "p44b_99",
                  "p50",
                  "p53",
                  "p55",
                  "p59")

capture_tables (indirect_PII)

# Recode those with very specific values.


break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                    "Estudia y tiene un trabajo remunerado"=2,
                    "Trabajo remunerado"=3,
                    "Quehaceres del hogar o trabajo no remunerado"=4,
                    "No hace nada"=5)
mydata <- ordinal_recode (variable="p4b1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4b1. Abuelo / Abuela 1
##                                      Estudia 
##                                            3 
##                           Trabajo remunerado 
##                                          104 
## Quehaceres del hogar o trabajo no remunerado 
##                                          147 
##                                 No hace nada 
##                                          143 
##                                         <NA> 
##                                         2338 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1     3     0     0     0         0
##   3     0     0   104     0         0
##   4     0     0     0   147         0
##   5     0     0     0     0       143
## [1] "Frequency table after encoding"
## p4b1. Abuelo / Abuela 1
##                                        Otros 
##                                            3 
##                           Trabajo remunerado 
##                                          104 
## Quehaceres del hogar o trabajo no remunerado 
##                                          147 
##                                 No hace nada 
##                                          143 
##                                         <NA> 
##                                         2338 
## [1] "Inspect value labels and relabel as necessary"
##                                        Otros 
##                                            1 
##        Estudia y tiene un trabajo remunerado 
##                                            2 
##                           Trabajo remunerado 
##                                            3 
## Quehaceres del hogar o trabajo no remunerado 
##                                            4 
##                                 No hace nada 
##                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Quehaceres del hogar o trabajo no remunerado"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4c1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4c1. Tío / Tía 1
##                                      Estudia 
##                                           19 
##        Estudia y tiene un trabajo remunerado 
##                                           13 
##                           Trabajo remunerado 
##                                          215 
## Quehaceres del hogar o trabajo no remunerado 
##                                           35 
##                                 No hace nada 
##                                           17 
##                                         <NA> 
##                                         2436 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1    19     0     0     0         0
##   2     0    13     0     0         0
##   3     0     0   215     0         0
##   4     0     0     0    35         0
##   5     0     0     0     0        17
## [1] "Frequency table after encoding"
## p4c1. Tío / Tía 1
##                                        Otros 
##                                           49 
##                           Trabajo remunerado 
##                                          215 
## Quehaceres del hogar o trabajo no remunerado 
##                                           35 
##                                         <NA> 
##                                         2436 
## [1] "Inspect value labels and relabel as necessary"
##                                        Otros 
##                                            1 
##                                        Otros 
##                                            2 
##                           Trabajo remunerado 
##                                            3 
## Quehaceres del hogar o trabajo no remunerado 
##                                            4 
##                                        Otros 
##                                            5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Otros"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4c2", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4c2. Tío / Tía 2
##                                      Estudia 
##                                            7 
##        Estudia y tiene un trabajo remunerado 
##                                            2 
##                           Trabajo remunerado 
##                                          103 
## Quehaceres del hogar o trabajo no remunerado 
##                                           21 
##                                 No hace nada 
##                                            4 
##                                         <NA> 
##                                         2598 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1     7     0     0     0         0
##   2     0     2     0     0         0
##   3     0     0   103     0         0
##   4     0     0     0    21         0
##   5     0     0     0     0         4
## [1] "Frequency table after encoding"
## p4c2. Tío / Tía 2
##              Otros Trabajo remunerado               <NA> 
##                 34                103               2598 
## [1] "Inspect value labels and relabel as necessary"
##              Otros              Otros Trabajo remunerado              Otros 
##                  1                  2                  3                  4 
##              Otros 
##                  5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
                     "Otros"=2,
                     "Trabajo remunerado"=3,
                     "Otros"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4c3", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4c3. Tío / Tía 3
##                                      Estudia 
##                                            3 
##        Estudia y tiene un trabajo remunerado 
##                                            1 
##                           Trabajo remunerado 
##                                           34 
## Quehaceres del hogar o trabajo no remunerado 
##                                            7 
##                                 No hace nada 
##                                            2 
##                                         <NA> 
##                                         2688 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1     3     0     0     0         0
##   2     0     1     0     0         0
##   3     0     0    34     0         0
##   4     0     0     0     7         0
##   5     0     0     0     0         2
## [1] "Frequency table after encoding"
## p4c3. Tío / Tía 3
##              Otros Trabajo remunerado               <NA> 
##                 13                 34               2688 
## [1] "Inspect value labels and relabel as necessary"
##              Otros              Otros Trabajo remunerado              Otros 
##                  1                  2                  3                  4 
##              Otros 
##                  5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
                     "Otros"=2,
                     "Otros"=3,
                     "Otros"=4,
                     "Otros"=5)
mydata <- ordinal_recode (variable="p4d1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## p4d1. Sobrino / Sobrina 1
##                               Estudia Estudia y tiene un trabajo remunerado 
##                                    35                                     2 
##                    Trabajo remunerado                          No hace nada 
##                                    13                                     7 
##                                  <NA> 
##                                  2678 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1    35     0     0     0         0
##   2     0     2     0     0         0
##   3     0     0    13     0         0
##   5     0     0     0     0         7
## [1] "Frequency table after encoding"
## p4d1. Sobrino / Sobrina 1
## Estudia   Otros    <NA> 
##      35      22    2678 
## [1] "Inspect value labels and relabel as necessary"
## Estudia   Otros   Otros   Otros   Otros 
##       1       2       3       4       5
break_material <- c(1,2,3,4,5,6,7,8,9)
labels_material <- c("Concreto, ladrillos o cemento"=1,
                     "Otro"=2,
                     "Calamina de metal o metal"=3,
                     "Calamina de plastico o plastico"=4,
                     "Madera"=5,
                     "Otro"=6,
                     "Otro"=7,
                     "Otro"=8,
                     "Otro"=9)
mydata <- ordinal_recode (variable="p8", break_points=break_material, missing=999999, value_labels=labels_material)

## [1] "Frequency table before encoding"
## p8. Material principal de construcción del techo del hogar
##   Concreto, ladrillos o cemento                           Tejas 
##                            1734                              17 
##       Calamina de metal o metal Calamina de plástico o plástico 
##                             455                             174 
##                          Madera                          Cartón 
##                             284                              10 
##                           Adobe                            Paja 
##                              18                               3 
##                            Otro                            <NA> 
##                              35                               5 
##     recoded
##      [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,1e+06)
##   1   1734     0     0     0     0     0     0     0         0
##   2      0    17     0     0     0     0     0     0         0
##   3      0     0   455     0     0     0     0     0         0
##   4      0     0     0   174     0     0     0     0         0
##   5      0     0     0     0   284     0     0     0         0
##   6      0     0     0     0     0    10     0     0         0
##   7      0     0     0     0     0     0    18     0         0
##   8      0     0     0     0     0     0     0     3         0
##   99     0     0     0     0     0     0     0     0        35
## [1] "Frequency table after encoding"
## p8. Material principal de construcción del techo del hogar
##   Concreto, ladrillos o cemento                            Otro 
##                            1734                              83 
##       Calamina de metal o metal Calamina de plastico o plastico 
##                             455                             174 
##                          Madera                            <NA> 
##                             284                               5 
## [1] "Inspect value labels and relabel as necessary"
##   Concreto, ladrillos o cemento                            Otro 
##                               1                               2 
##       Calamina de metal o metal Calamina de plastico o plastico 
##                               3                               4 
##                          Madera                            Otro 
##                               5                               6 
##                            Otro                            Otro 
##                               7                               8 
##                            Otro 
##                               9

Matching and crosstabulations: Run automated PII check

# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('hh_gender', 'hh_ageinyears', 'p4_1') ##!!! Replace with candidate categorical demo vars

# weight variable (add if available)
# selectedWeightVar = c('projwt') ##!!! Replace with weight var

# household id variable (cluster)
# selectedHouseholdID = c('wpid') ##!!! Replace with household id

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2735 rows and 804 variables.
##   --> Categorical key variables: hh_gender, hh_ageinyears, p4_1
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##   Key Variable Number of categories      Mean size            Size of smallest (>0)
##      hh_gender                    3  (3)  1365.000 (1365.000)                   482
##  hh_ageinyears                   22 (22)   130.000  (130.000)                    58
##           p4_1                    6  (6)   354.800  (354.800)                     1
##       
##  (482)
##   (58)
##    (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 5 (0.183%)
##   - 3-anonymity: 7 (0.256%)
##   - 5-anonymity: 11 (0.402%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 5 x 3
##    hh_gender hh_ageinyears                                             p4_1
##    <dbl+lbl>     <dbl+lbl>                                        <dbl+lbl>
## 1 1 [Hombre]       10 [39] 5 [No hace nada]                                
## 2 1 [Hombre]        9 [38] 2 [Estudia y tiene un trabajo remunerado]       
## 3 1 [Hombre]       11 [40] 2 [Estudia y tiene un trabajo remunerado]       
## 4 1 [Hombre]       16 [45] 4 [Quehaceres del hogar o trabajo no remunerado]
## 5 1 [Hombre]        8 [37] 2 [Estudia y tiene un trabajo remunerado]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
##      hh_gender hh_ageinyears p4_1
## 757          1            NA    5
## 924          1            NA    2
## 2084         1            NA    2
## 2096         1            NA    4
## 2220         1            NA    2
mydata [notAnon,"hh_gender"] <- NA
mydata[notAnon,"p4_1"] <- NA

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("hh_parentesco_other",
               "pref65f",
               "pref66f",
               "p44c",
               "p51a",
               "i19a1",
               "p8a")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% "hh_parentesco_other"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "pref65f"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "pref66f"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p44c"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p51a"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "i19a1"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p8a"] # Drop as actually verbatim data in Spanish

GPS data: Displace

# Setup map
# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)