rm(list=ls(all=t))
filename <- "InDepthStudents2016_Rural_Raw_NOPII" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
#!!!Save flagged dictionary in .csv format, add "DatasetReview" to name and continue processing data with subset of flagged variables
# !!!Include any Direct PII variables
dropvars <- c("nomest",
"apest",
"amest",
"nompad",
"app_pad",
"nommad",
"app_mad",
"p1a1_fixed",
"p1a2",
"p1a2_fixed",
"p1a3",
"p1a3_fixed",
"p1a4",
"p1a4_fixed",
"address",
"referencia",
"audio1_student",
"audio2_student",
"audio3_student",
"text_audit",
"cto_padre",
"cto_padre_nom",
"cto_padre_app1",
"cto_padre_app2",
"audio_random",
"key")
mydata <- mydata[!names(mydata) %in% dropvars]
# Interviewer names, for example may be useful for analysis of interviewer effects
!!!Replace vector in "variables" field below with relevant variable names
mydata <- mydata[!names(mydata) %in% "i5"]
!!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("i8a",
"i7",
"i9a1",
"cod_mod",
"school_fixed_primary",
"school_fixed_sec",
"nom_dist",
"district_fixed",
"p12",
"school2014_name1",
"school2013_name1")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## i8a. Provincia
## AREQUIPA CAMANA CASTILLA CAYLLOMA CONDESUYOS LA UNION CUSCO ACOMAYO
## 12 2 88 39 48 148 52 47
## ANTA CALCA CANAS CANCHIS CHUMBIVILCAS ESPINAR PARURO PAUCARTAMBO
## 291 161 227 2 744 55 314 395
## QUISPICANCHI URUBAMBA <NA>
## 17 111 16
## [1] "Frequency table after encoding"
## i8a. Provincia
## 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 <NA>
## 227 744 161 52 2 291 48 395 314 55 148 88 2 39 17 111 12 47 16
## [1] "Frequency table before encoding"
## i7. Distrito
## AREQUIPA ALTO SELVA ALEGRE CERRO COLORADO JACOBO HUNTER PAUCARPATA
## 1 1 3 2 1
## SACHACA SOCABAYA YURA JOSE MARIA QUIMPER SAMUEL PASTOR
## 1 1 2 1 1
## ANDAGUA CHACHAS CHILCAYMARCA CHOCO ORCOPAMPA
## 3 20 8 1 47
## PAMPACOLCA TIPAN VIRACO CHIVAY ACHOMA
## 5 2 2 6 1
## CAYLLOMA SIBAYO TAPAY CHUQUIBAMBA CAYARANI
## 29 1 2 5 38
## IRAY SALAMANCA COTAHUASI ALCA HUAYNACOTAS
## 3 2 14 21 21
## PAMPAMARCA PUYCA TOMEPAMPA CUSCO CCORCA
## 27 54 11 5 10
## SAN JERONIMO SAN SEBASTIAN SANTIAGO WANCHAQ ACOS
## 15 1 19 2 3
## RONDOCAN ANTA ANCAHUASI CHINCHAYPUJIO HUAROCONDO
## 44 87 97 57 34
## PUCYURA ZURITE CALCA LAMAY PISAC
## 1 15 1 40 61
## SAN SALVADOR YANAOCA CHECCA KUNTURKANKI LANGUI
## 59 1 135 80 9
## QUEHUE SICUANI SANTO TOMAS CAPACMARCA CHAMACA
## 2 2 99 33 116
## COLQUEMARCA LIVITACA LLUSCO QUI<U+FFFD>OTA VELILLE
## 101 139 103 57 96
## ESPINAR COPORAQUE PARURO ACCHA CCAPI
## 6 49 16 38 21
## COLCHA HUANOQUITE OMACHA PACCARITAMBO PILLPINTO
## 18 37 111 21 1
## YAURISQUE PAUCARTAMBO CAICAY CHALLABAMBA COLQUEPATA
## 51 6 42 63 149
## HUANCARANI ANDAHUAYLILLAS LUCRE URUBAMBA CHINCHERO
## 135 1 16 29 44
## HUAYLLABAMBA MARAS OLLANTAYTAMBO <NA>
## 1 32 5 16
## [1] "Frequency table after encoding"
## i7. Distrito
## 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
## 49 1 15 42 57 99 1 34 2 8 139 15 3 103 16 1 37 44 1 111 21 27
## 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
## 6 1 1 57 101 3 21 135 96 33 2 14 1 11 1 16 135 1 1 38 40 47
## 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
## 97 18 10 5 51 1 9 19 149 5 1 20 21 1 61 3 6 44 87 29 3 2
## 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
## 32 2 2 2 5 80 1 54 38 116 1 5 29 2 1 1 2 63 59 2 21 6
## <NA>
## 16
## [1] "Frequency table before encoding"
## i9a1. Código modular
## 204800 204875 204909 205005 205047 205112 205120 205153 205682 205690 205773 205781 205815
## 10 8 2 5 6 8 11 7 6 7 9 2 5
## 206334 207373 207407 216341 219741 220285 226704 232207 232223 232231 232249 232264 232504
## 3 1 1 8 2 3 6 9 9 6 2 7 3
## 232512 232538 232546 232553 232561 232579 232587 232595 232603 232611 232645 232728 232777
## 6 3 5 5 4 3 7 5 7 2 1 3 6
## 233130 233296 233361 233676 233718 233734 233825 233882 233890 233908 233916 233924 233932
## 4 5 4 3 7 5 3 1 3 6 8 9 3
## 233940 233957 233965 233973 233981 233999 234021 234062 234096 234104 234112 234120 234138
## 4 6 3 5 6 8 7 5 6 6 4 4 8
## 234153 234161 234187 234195 234203 234229 234237 234351 234369 234377 234385 234401 234419
## 6 8 5 1 4 5 6 1 9 5 8 7 8
## 234427 234443 234450 234500 234583 234674 234682 234781 234831 234856 236158 236349 236422
## 7 6 3 7 3 9 7 3 7 7 6 5 16
## 236448 236463 236471 236489 236653 236661 236927 287409 287425 287466 309286 309294 309377
## 4 8 1 7 1 31 9 10 6 3 1 12 1
## 309419 309435 309567 310433 310441 312090 312215 312306 312421 312744 312868 313080 313239
## 3 1 4 3 1 2 6 10 5 2 2 1 2
## 313395 313460 313890 313908 313965 313981 314070 314187 314211 314237 314245 314252 314260
## 9 1 3 8 6 6 2 5 4 4 4 6 6
## 314278 314294 405258 405498 405704 405738 405746 405852 405894 405902 405928 405936 406009
## 10 5 6 8 5 5 4 9 8 8 8 9 6
## 406066 406082 406116 406124 406140 406215 406223 406264 406413 406595 406629 406645 406975
## 10 7 6 8 3 5 6 4 8 8 9 5 6
## 406983 407007 407049 408211 408245 408278 408286 408294 408328 408336 408393 408468 408476
## 10 4 7 1 7 4 4 3 5 1 4 8 7
## 408484 408492 408559 408567 408609 408666 408732 408773 408823 408856 408922 408955 408971
## 5 5 4 8 6 8 3 4 3 8 7 4 5
## 409003 409011 409029 409193 409227 409235 409243 409284 409292 409300 409318 409326 409359
## 7 10 9 5 8 8 6 9 7 9 7 4 7
## 409441 409565 409896 410464 410480 410514 410613 410670 410746 410779 410787 410803 473249
## 9 4 2 1 7 5 1 4 7 9 5 7 7
## 481283 486688 486928 489120 495069 495325 498782 499863 502922 504142 517581 517888 518084
## 9 8 2 8 14 16 1 3 2 13 29 8 22
## 518472 519496 519595 519678 525923 550392 551309 557587 579268 579276 579284 579292 579300
## 10 6 6 6 21 7 3 9 9 1 1 17 20
## 585885 587055 587147 587204 589200 589747 589804 591255 591602 592147 612051 612119 612291
## 6 15 4 13 1 1 5 23 14 4 2 2 5
## 612416 612507 612689 612747 612770 612804 615013 616110 617787 617829 621391 623017 623041
## 2 9 1 1 1 4 5 6 17 6 15 4 2
## 637215 637272 639542 647388 647412 647446 647628 655746 671628 672105 678961 679829 680058
## 1 9 2 10 7 16 4 1 4 4 2 2 9
## 680082 680124 699603 712562 712711 712778 723031 730655 731273 735498 736116 775700 776039
## 1 18 12 2 2 2 3 6 2 5 2 5 3
## 783423 783597 783621 783696 783704 783720 783787 783795 791319 791574 794438 796888 818674
## 12 1 1 11 1 14 10 10 20 4 2 2 6
## 818708 844159 844183 891408 891812 895482 899351 927871 930958 931055 931063 932236 932434
## 3 2 4 14 1 1 3 10 1 15 18 12 2
## 932491 932608 932848 933226 933283 933291 933317 933531 933598 933846 1031574 1117704 1120005
## 4 8 7 6 10 1 6 1 2 6 4 10 1
## 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 1321355 1321421 1327279
## 15 6 2 6 1 2 2 10 6 11 9 11 2
## 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939
## 9 3 9 12 13 7 1 13 8 1 8 19 11
## 1347970 1352269 1364868 1369248 1372507 1374438 1377209 1377233 1377415 1379361 1379544 1380021 1380120
## 14 1 7 14 7 1 11 1 14 1 4 20 4
## 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390582 1390665 1390673 1392083
## 10 2 5 11 9 5 2 11 6 1 17 16 7
## 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 1392240 1392257 1396191 1396209 1396225
## 6 10 7 13 16 14 2 7 3 8 19 11 27
## 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 1408426 1412634 1412873 1415983
## 18 4 9 1 1 11 8 17 7 2 5 3 4
## 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192
## 4 12 5 7 9 9 12 9 8 6 7 10 10
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1719210 1723469 <NA>
## 7 8 8 3 4 9 1 3 2 16
## [1] "Frequency table after encoding"
## i9a1. Código modular
## 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
## 2 17 4 6 6 10 1 16 9 1 2 8 6 6 10 7 15 1 1 7 4 22
## 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
## 1 3 9 9 6 9 5 2 6 1 4 7 9 3 3 7 7 1 12 9 3 8
## 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
## 2 4 5 9 7 9 4 4 14 5 6 6 15 9 18 5 3 6 13 1 1 10
## 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
## 7 9 10 1 1 8 5 3 2 4 5 14 6 16 8 3 2 6 1 4 7 7
## 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
## 3 7 1 4 8 6 8 2 3 3 3 10 11 29 10 10 8 6 2 7 5 7
## 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
## 6 2 1 1 4 8 8 2 27 6 8 5 6 5 14 4 6 2 9 9 7 7
## 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
## 6 9 3 9 3 7 3 20 31 5 10 8 9 6 11 8 5 5 9 12 11 14
## 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
## 4 5 8 6 8 5 11 15 1 4 10 2 8 2 8 1 4 5 1 8 9 1
## 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
## 5 18 2 7 11 12 3 19 6 7 7 1 11 5 10 12 8 2 2 4 1 7
## 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
## 14 9 5 17 6 9 6 2 5 12 7 2 8 2 5 1 7 4 7 8 4 5
## 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
## 9 2 6 7 7 7 8 12 8 1 7 20 3 6 8 6 4 6 5 1 3 10
## 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
## 4 6 1 2 16 1 4 9 1 14 6 8 14 9 3 1 2 2 1 2 4 5
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
## 9 13 10 5 7 6 4 2 4 16 3 1 8 9 4 2 1 6 1 1 1 1
## 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
## 2 21 2 8 1 19 11 1 1 13 9 6 6 3 10 10 4 1 3 6 2 1
## 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
## 2 7 5 3 4 1 5 6 10 9 1 8 7 5 4 1 2 3 10 7 7 4
## 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
## 3 6 3 5 3 8 3 7 9 15 16 6 17 23 13 2 5 5 6 1 6 9
## 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
## 3 11 7 6 6 8 11 8 8 9 9 1 6 1 2 3 7 4 3 8 1 2
## 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
## 6 10 2 5 1 5 8 7 9 4 7 4 18 6 4 4 5 4 20 10 4 1
## 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
## 11 7 2 17 4 7 3 5 2 6 8 7 4 2 12 4 5 7 10 2 4 4
## 977 978 979 980 981 982 983 <NA>
## 14 8 13 4 3 3 5 16
## [1] "Frequency table before encoding"
## cod_mod.
## 1031574 1117704 1120005 1201649 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322
## 16 4 10 1 15 6 2 6 1 2 2 10 6
## 1321330 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 1347293 1347301
## 11 9 11 2 9 3 9 12 13 7 1 13 8
## 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1374438 1377209 1377233 1377415
## 1 8 19 11 14 1 7 14 7 1 11 1 14
## 1379361 1379544 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517
## 1 4 20 4 10 2 5 11 9 5 2 11 6
## 1390582 1390665 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392216 1392224 1392240
## 1 17 16 7 6 10 7 13 16 14 2 7 3
## 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536
## 8 19 11 27 18 4 9 1 1 11 8 17 7
## 1408426 1412634 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810
## 2 5 3 4 4 12 5 7 9 9 12 9 8
## 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 1666130 1719210 1723469
## 6 7 10 10 7 8 8 3 4 9 1 3 2
## 204800 204875 204909 205005 205047 205112 205120 205153 205682 205690 205773 205781 205815
## 10 8 2 5 6 8 11 7 6 7 9 2 5
## 206334 207373 207407 216341 219741 220285 226704 232207 232223 232231 232249 232264 232504
## 3 1 1 8 2 3 6 9 9 6 2 7 3
## 232512 232538 232546 232553 232561 232579 232587 232595 232603 232611 232645 232728 232777
## 6 3 5 5 4 3 7 5 7 2 1 3 6
## 233130 233296 233361 233676 233718 233734 233825 233882 233890 233908 233916 233924 233932
## 4 5 4 3 7 5 3 1 3 6 8 9 3
## 233940 233957 233965 233973 233981 233999 234021 234062 234096 234104 234112 234120 234138
## 4 6 3 5 6 8 7 5 6 6 4 4 8
## 234153 234161 234187 234195 234203 234229 234237 234351 234369 234377 234385 234401 234419
## 6 8 5 1 4 5 6 1 9 5 8 7 8
## 234427 234443 234450 234500 234583 234674 234682 234781 234831 234856 236158 236349 236422
## 7 6 3 7 3 9 7 3 7 7 6 5 16
## 236448 236463 236471 236489 236653 236661 236927 287409 287425 287466 309286 309294 309377
## 4 8 1 7 1 31 9 10 6 3 1 12 1
## 309419 309435 309567 310433 310441 312090 312215 312306 312421 312744 312868 313080 313239
## 3 1 4 3 1 2 6 10 5 2 2 1 2
## 313395 313460 313890 313908 313965 313981 314070 314187 314211 314237 314245 314252 314260
## 9 1 3 8 6 6 2 5 4 4 4 6 6
## 314278 314294 405258 405498 405704 405738 405746 405852 405894 405902 405928 405936 406009
## 10 5 6 8 5 5 4 9 8 8 8 9 6
## 406066 406082 406116 406124 406140 406215 406223 406264 406413 406595 406629 406645 406975
## 10 7 6 8 3 5 6 4 8 8 9 5 6
## 406983 407007 407049 408211 408245 408278 408286 408294 408328 408336 408393 408468 408476
## 10 4 7 1 7 4 4 3 5 1 4 8 7
## 408484 408492 408559 408567 408609 408666 408732 408773 408823 408856 408922 408955 408971
## 5 5 4 8 6 8 3 4 3 8 7 4 5
## 409003 409011 409029 409193 409227 409235 409243 409284 409292 409300 409318 409326 409359
## 7 10 9 5 8 8 6 9 7 9 7 4 7
## 409441 409565 409896 410464 410480 410514 410613 410670 410746 410779 410787 410803 473249
## 9 4 2 1 7 5 1 4 7 9 5 7 7
## 481283 486688 486928 489120 495069 495325 498782 499863 502922 504142 517581 517888 518084
## 9 8 2 8 14 16 1 3 2 13 29 8 22
## 518472 519496 519595 519678 525923 550392 551309 557587 579268 579276 579284 579292 579300
## 10 6 6 6 21 7 3 9 9 1 1 17 20
## 585885 587055 587147 587204 589200 589747 589804 591255 591602 592147 612051 612119 612291
## 6 15 4 13 1 1 5 23 14 4 2 2 5
## 612416 612507 612689 612747 612770 612804 615013 616110 617787 617829 621391 623017 623041
## 2 9 1 1 1 4 5 6 17 6 15 4 2
## 637215 637272 639542 647388 647412 647446 647628 655746 671628 672105 678961 679829 680058
## 1 9 2 10 7 16 4 1 4 4 2 2 9
## 680082 680124 699603 712562 712711 712778 723031 730655 731273 735498 736116 775700 776039
## 1 18 12 2 2 2 3 6 2 5 2 5 3
## 783423 783597 783621 783696 783704 783720 783787 783795 791319 791574 794438 796888 818674
## 12 1 1 11 1 14 10 10 20 4 2 2 6
## 818708 844159 844183 891408 891812 895482 899351 927871 930958 931055 931063 932236 932434
## 3 2 4 14 1 1 3 10 1 15 18 12 2
## 932491 932608 932848 933226 933283 933291 933317 933531 933598 933846
## 4 8 7 6 10 1 6 1 2 6
## [1] "Frequency table after encoding"
## cod_mod.
## 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
## 7 8 2 9 4 11 5 9 1 9 6 4 4 5 3 31 4 10 2 1 16 3 4 8 6 13 1
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
## 15 7 1 2 4 1 15 2 5 7 10 3 9 2 13 2 8 10 9 1 10 6 10 14 6 6 9
## 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
## 3 9 7 2 7 4 4 6 18 2 5 3 4 7 2 2 8 6 8 12 1 6 1 17 3 8 3
## 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
## 8 3 1 1 1 12 4 16 17 7 12 2 1 1 9 9 4 3 6 9 4 9 6 5 7 7 9
## 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
## 7 7 9 1 5 5 6 3 4 3 3 9 9 7 10 7 7 2 21 18 8 6 7 14 2 6 3
## 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
## 9 4 9 6 2 9 8 9 5 4 4 5 17 3 8 8 6 8 6 8 4 3 7 6 4 3 10
## 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
## 6 4 5 1 10 7 22 4 4 5 7 3 3 9 14 7 3 10 14 7 5 9 11 4 19 10 1
## 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
## 4 3 1 5 7 5 7 1 16 1 3 5 7 4 2 2 4 6 4 20 2 6 14 23 11 10 7
## 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
## 11 1 8 10 16 2 9 13 11 3 9 7 2 5 4 8 1 20 11 1 4 2 1 8 7 8 8
## 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
## 5 16 8 2 2 6 9 12 2 7 9 5 2 3 12 8 4 2 6 8 8 1 5 9 7 1 4
## 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
## 6 8 3 7 3 10 5 9 8 19 10 6 15 6 1 11 1 3 20 1 4 7 6 14 4 1 1
## 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
## 5 2 7 7 1 6 3 3 8 8 8 7 14 1 6 10 4 11 6 7 5 1 6 6 4 9 6
## 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
## 2 16 8 9 1 2 27 1 5 4 3 2 5 7 8 9 2 11 6 8 2 8 5 10 1 6 6
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
## 6 4 5 5 1 5 5 12 1 3 13 5 7 8 6 1 5 29 6 6 5 8 11 4 7 7 5
## 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
## 2 1 6 1 3 13 10 2 2 4 6 2 8 1 4 6 10 3 4 1 5 1 3 10 1 7 6
## 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
## 6 9 1 6 18 15 2 12 1 7 17 1 1 8 5 5 2 2 2 14 4
## [1] "Frequency table before encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
## 219741 312561 405217 408922 408971 409243 612291 647388 679829
## 2757 1 1 2 1 1 1 1 1 3
## [1] "Frequency table after encoding"
## school_fixed_primary. Seleccione la escuela primaria a donde realmente va el niño(a)
## 451 452 453 454 455 456 457 458 459 460
## 1 1 1 1 2757 3 1 2 1 1
## [1] "Frequency table before encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
## 1253905 1345024 1347301 1347434 1347921 1379544 1380021 1392240 1395367 1401934 1402536 1452705
## 2725 2 1 3 1 1 2 1 1 1 1 1 1
## 1540996 236109 309567 579300 589804 612507 616110 621391 637272 680124 783720 791319 931055
## 1 1 1 1 2 1 1 1 7 1 2 2 3
## 931436 932608 933226 933317
## 1 1 1 1
## [1] "Frequency table after encoding"
## school_fixed_sec. Seleccione la escuela secundaria a donde realmente va el niño(a)
## 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980
## 1 1 1 1 1 1 1 3 1 1 1 2 1 2 1 1 2725 1 1 1 3 7
## 981 982 983 984 985 986 987 988
## 2 1 2 1 1 1 2 1
## [1] "Frequency table before encoding"
## nom_dist.
## ACCHA ACHOMA ACOS ALCA
## 16 38 1 3 21
## ALTO SELVA ALEGRE ANCAHUASI ANDAGUA ANDAHUAYLILLAS ANTA
## 1 97 3 1 87
## AREQUIPA CAICAY CALCA CAPACMARCA CAYARANI
## 1 42 1 33 38
## CAYLLOMA CCAPI CCORCA CERRO COLORADO CHACHAS
## 29 21 10 3 20
## CHALLABAMBA CHAMACA CHECCA CHILCAYMARCA CHINCHAYPUJIO
## 63 116 135 8 57
## CHINCHERO CHIVAY CHOCO CHUQUIBAMBA COLCHA
## 44 6 1 5 18
## COLQUEMARCA COLQUEPATA COPORAQUE COTAHUASI CUSCO
## 101 149 49 14 5
## ESPINAR HUANCARANI HUANOQUITE HUAROCONDO HUAYLLABAMBA
## 6 135 37 34 1
## HUAYNACOTAS IRAY JACOBO HUNTER JOSE MARIA QUIMPER KUNTURKANKI
## 21 3 2 1 80
## LAMAY LANGUI LIVITACA LLUSCO LUCRE
## 40 9 139 103 16
## MARAS OLLANTAYTAMBO OMACHA ORCOPAMPA PACCARITAMBO
## 32 5 111 47 21
## PAMPACOLCA PAMPAMARCA PARURO PAUCARPATA PAUCARTAMBO
## 5 27 16 1 6
## PILLPINTO PISAC PUCYURA PUYCA QUEHUE
## 1 61 1 54 2
## QUI<U+FFFD>OTA RONDOCAN SACHACA SALAMANCA SAMUEL PASTOR
## 57 44 1 2 1
## SAN JERONIMO SAN SALVADOR SAN SEBASTIAN SANTIAGO SANTO TOMAS
## 15 59 1 19 99
## SIBAYO SICUANI SOCABAYA TAPAY TIPAN
## 1 2 1 2 2
## TOMEPAMPA URUBAMBA VELILLE VIRACO WANCHAQ
## 11 29 96 2 2
## YANAOCA YAURISQUE YURA ZURITE
## 1 51 2 15
## [1] "Frequency table after encoding"
## nom_dist.
## 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
## 2 14 2 38 80 1 47 1 101 135 21 61 1 16 99 10 5 33 49 27 59 1 15 2 1 103 15
## 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
## 6 97 21 38 57 2 1 1 32 42 149 16 1 3 1 87 40 20 1 1 2 9 3 1 37 44 8
## 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
## 2 51 11 63 6 1 1 29 139 29 18 21 19 5 3 2 44 2 96 3 21 16 2 5 135 6 1
## 720 721 722 723 724 725 726 727
## 34 54 57 116 111 1 5 1
## [1] "Frequency table before encoding"
## district_fixed. ¿Entonces en qué distrito vives?
## Otro CERRO COLORADO CHACHAS ORCOPAMPA VIRACO ALCA HUAYNACOTAS
## 28 1 1 8 1 1 1
## PUYCA TOMEPAMPA SAN JERONIMO SANTIAGO RONDOCAN ANTA ANCAHUASI
## 3 1 2 1 2 1 1
## HUAROCONDO ZURITE PISAC CHECCA SANTO TOMAS LIVITACA LLUSCO
## 1 1 1 2 3 1 1
## ESPINAR PARURO HUANOQUITE PACCARITAMBO CAICAY COLQUEPATA CHINCHERO
## 1 1 1 1 9 5 1
## MARAS <NA>
## 1 2687
## [1] "Frequency table after encoding"
## district_fixed. ¿Entonces en qué distrito vives?
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## 1 3 1 1 1 9 2 1 1 2 1 8 5 1 1 1 3 1 28 1 1 2
## 397 398 399 400 401 402 403 <NA>
## 1 1 1 1 1 1 1 2687
## [1] "Frequency table before encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la
## 1031574 1117704 1270214 1274398 1320647 1321421 1327287 1339472 1341585 1343573 1343581 1344639
## 2281 1 3 1 1 4 4 1 1 1 4 3 4
## 1345024 1347301 1347921 1347939 1347970 1364868 1364900 1369248 1370378 1371095 1372507 1374438 1379320
## 2 2 1 6 8 4 2 1 1 1 3 2 1
## 1380021 1380120 1386432 1388651 1389279 1389303 1390095 1390467 1390517 1392083 1392091 1392117 1392125
## 1 1 1 1 2 1 1 1 3 2 3 1 4
## 1392174 1392224 1392232 1392240 1392257 1393313 1396191 1396209 1396225 1396852 1396886 1398932 1401934
## 3 9 1 3 3 1 1 4 9 3 6 1 6
## 1401942 1401959 1402536 1412873 1415983 1423003 1452705 1458348 1470582 1523802 1523810 1523828 1540988
## 2 10 3 1 1 3 5 2 1 4 5 3 7
## 1540996 1625532 1625557 1625573 1637263 1659101 207407 233056 233130 236158 236174 236422 236430
## 2 1 4 6 1 4 2 2 3 3 1 1 1
## 236463 236646 236653 236661 236927 309286 309294 309419 309567 309641 309716 310433 310441
## 2 3 1 9 1 1 9 3 3 5 1 1 1
## 477828 489096 495069 495325 517581 518084 518241 518472 519678 525923 579243 579292 579300
## 1 2 3 7 10 6 1 3 2 14 1 5 6
## 579409 587055 587204 589804 591164 591255 591602 612051 612507 617787 617829 621391 637272
## 1 2 13 1 1 3 1 2 10 6 7 7 15
## 647446 680082 680124 699603 712778 730515 783696 783704 783720 783787 783795 785097 791319
## 6 1 11 3 1 1 7 1 1 5 4 1 19
## 891408 891788 894915 927814 927871 929638 930859 931063 931436 932236 932608 933283 933317
## 7 1 1 1 2 1 1 6 1 3 3 3 4
## 933556 933598 933846 934141
## 3 1 5 2
## [1] "Frequency table after encoding"
## p12. Seleccione la escuela a la que le gustaría asistir. Si la escuela no está en la
## 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
## 2281 1 1 5 1 6 3 1 2 6 1 5 1 4 1 3 1 3 1 3 7 7
## 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
## 1 1 2 3 1 1 1 1 5 1 3 6 1 1 1 2 3 2 4 3 1 2
## 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
## 1 4 4 14 4 1 6 1 3 1 1 2 6 3 9 3 1 1 15 1 2 1
## 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
## 4 1 3 3 4 2 1 11 6 8 1 4 1 4 3 5 1 3 1 2 2 1
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
## 2 1 4 1 3 1 6 10 7 1 1 3 3 2 13 1 1 3 1 3 2 3
## 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
## 2 4 6 1 1 3 1 1 1 10 1 1 5 9 10 7 1 7 4 2 1 2
## 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
## 3 9 1 1 1 2 5 7 6 3 9 19 1 3 2
## [1] "Frequency table before encoding"
## school2014_name1. Seleccione la escuela a la que asistió en el año escolar del 2014. . Asegúrese q
## 1117944 1342294 1377209 1398858 1399443 1412634 1694637 204800 204875 204909 204925 205005
## 2001 1 1 7 2 1 3 2 3 3 5 1 1
## 205047 205153 205682 205690 205773 205781 205815 206334 216341 220285 226704 232207 232231
## 6 6 6 6 1 2 3 3 4 1 1 3 2
## 232249 232264 232504 232512 232538 232546 232553 232561 232579 232587 232595 232603 232611
## 1 3 1 5 1 4 4 3 2 5 2 3 1
## 232645 232728 232777 233015 233676 233718 233734 233825 233833 233890 233965 233973 233981
## 2 3 6 1 2 2 10 9 1 1 2 2 2
## 233999 234021 234039 234047 234062 234096 234112 234120 234138 234161 234187 234195 234203
## 7 8 1 1 8 3 9 10 2 3 3 8 7
## 234211 234229 234237 234369 234377 234385 234401 234427 234435 234443 234450 234476 234500
## 3 4 1 9 6 2 4 1 2 11 4 1 8
## 234575 234583 234674 234682 234781 234831 234856 287409 287417 287425 287466 287813 307579
## 1 1 8 3 1 6 9 9 1 1 2 1 1
## 312090 312215 312306 312421 312868 313239 313395 313460 313890 313908 313965 313973 313981
## 3 4 4 6 4 1 1 1 7 4 2 1 1
## 313999 314096 314187 314211 314237 314245 314252 314260 314278 314294 405001 405050 405258
## 1 4 1 4 1 4 5 4 3 6 1 1 3
## 405498 405704 405738 405746 405753 405852 405894 405902 405910 405936 406009 406041 406058
## 1 1 5 3 2 1 1 3 1 4 3 1 1
## 406066 406116 406124 406140 406223 406249 406264 406397 406413 406629 406637 406983 407007
## 3 4 2 1 1 1 2 1 1 5 7 3 9
## 407049 408229 408237 408245 408278 408286 408294 408336 408344 408393 408468 408476 408484
## 4 1 2 2 2 1 2 1 7 3 5 6 4
## 408492 408526 408542 408559 408583 408609 408666 408716 408732 408757 408773 408823 408856
## 3 1 2 3 5 4 4 1 7 1 5 4 5
## 408922 408955 408971 409003 409011 409029 409193 409235 409243 409284 409292 409300 409318
## 7 4 5 1 5 1 2 5 3 3 1 6 1
## 409359 409441 409565 409896 410480 410670 410738 410779 481283 499863 502922 517888 519496
## 5 4 3 2 3 3 1 1 2 7 1 7 5
## 519595 550392 551309 557587 585885 587089 587147 592634 612291 612416 612655 615013 623017
## 4 5 2 5 4 1 3 1 1 1 1 2 6
## 623041 637215 647388 647412 647628 671628 678839 678904 678961 680058 712562 712711 723031
## 1 2 5 1 3 2 1 2 2 4 4 2 1
## 730655 731273 731596 735480 735498 736033 736116 775700 783423 783597 796888 818674 818708
## 2 8 4 1 1 1 1 5 5 1 3 3 2
## 844159 844183 899351 930958 932434 932491 932848
## 2 3 2 11 1 2 5
## [1] "Frequency table after encoding"
## school2014_name1. Seleccione la escuela a la que asistió en el año escolar del 2014. . Asegúrese q
## 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
## 3 1 5 1 1 2 1 4 2 2 6 1 1 4 1 2001 4 3 2 2 1 8
## 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
## 1 4 2 5 7 2 1 1 2 7 1 1 3 8 1 7 1 11 6 4 1 2
## 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
## 7 6 3 1 1 5 4 6 3 4 1 6 9 6 1 6 2 3 2 4 2 1
## 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
## 2 1 5 1 2 1 1 1 4 4 2 1 2 3 2 2 3 3 3 8 1 1
## 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
## 3 2 3 1 3 5 1 7 2 2 1 1 3 10 2 5 6 3 4 1 1 1
## 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
## 1 7 4 5 3 1 1 1 3 2 3 2 5 2 5 2 2 7 4 3 1 1
## 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
## 5 1 8 1 3 3 1 1 1 9 3 3 9 7 2 3 1 1 7 1 4 3
## 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
## 3 5 1 4 3 4 2 1 2 2 3 4 4 1 1 5 2 8 4 2 3 2
## 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
## 1 6 1 3 1 10 5 1 4 1 4 2 2 5 11 5 1 9 1 6 2 4
## 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
## 1 8 1 1 4 1 1 4 1 6 5 1 5 1 3 2 4 4 2 4 9 3
## 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
## 1 7 3 1 1 1 1 6 4 5 1 3 5 5 2 5 1 3 1 9 3
## [1] "Frequency table before encoding"
## school2013_name1. Seleccione la escuela a la que asistió en el año escolar del 2013. Si la escuela
## 1117944 1342294 1377209 1398858 1399443 1412634 1423201 1694637 204800 204875 204909 204925
## 1979 1 2 7 1 1 4 1 2 4 3 5 2
## 205005 205047 205153 205682 205690 205773 205781 205815 205880 206326 206334 216341 219188
## 1 6 6 6 5 1 3 3 1 1 3 4 1
## 220285 226704 226894 232207 232231 232249 232264 232504 232512 232538 232546 232553 232561
## 1 1 1 3 2 2 3 1 5 1 4 4 3
## 232579 232587 232595 232603 232611 232645 232728 232777 233015 233668 233676 233718 233734
## 2 5 2 4 1 2 3 6 1 1 2 2 10
## 233759 233825 233833 233890 233965 233973 233981 233999 234021 234039 234047 234062 234096
## 1 9 1 2 2 2 2 6 8 1 1 8 3
## 234112 234120 234138 234161 234187 234195 234203 234211 234229 234237 234369 234377 234385
## 9 10 2 3 3 8 7 3 4 1 10 6 2
## 234401 234427 234435 234443 234450 234500 234575 234583 234674 234682 234781 234831 234856
## 5 1 2 11 4 8 1 1 8 3 1 6 8
## 287409 287417 287425 287466 287813 311878 312090 312215 312306 312421 312629 312868 313239
## 9 1 1 2 1 1 3 4 4 6 1 4 1
## 313395 313460 313890 313908 313965 313973 313981 313999 314096 314187 314211 314237 314245
## 1 1 7 5 2 1 1 1 4 1 4 1 5
## 314252 314260 314278 314294 405001 405050 405076 405258 405498 405704 405738 405746 405753
## 5 4 3 6 1 1 1 4 1 1 4 3 2
## 405894 405902 405936 406009 406033 406041 406066 406116 406124 406140 406223 406249 406264
## 1 3 4 3 1 1 2 3 2 1 1 1 1
## 406371 406413 406629 406637 406983 407007 407049 408237 408245 408278 408286 408294 408344
## 2 1 6 8 4 7 2 1 1 2 1 2 7
## 408351 408385 408393 408468 408476 408484 408492 408526 408542 408559 408583 408591 408609
## 1 1 4 5 6 4 4 2 2 3 5 1 4
## 408666 408716 408732 408757 408773 408823 408856 408922 408955 408971 409003 409011 409029
## 5 1 7 1 4 4 5 7 4 5 1 6 1
## 409193 409235 409243 409284 409292 409300 409318 409359 409441 409557 409565 409896 410480
## 3 5 3 3 1 5 1 5 4 1 1 2 3
## 410670 410704 410738 410779 481283 486688 499863 502922 517888 519496 519595 550392 551309
## 2 1 1 1 2 1 7 1 7 5 5 5 2
## 557587 585885 587089 587147 592634 612291 612416 612655 612804 615013 623017 623041 637215
## 5 4 1 2 1 1 2 1 1 2 6 1 2
## 647305 647388 647412 647628 671628 678839 678904 678961 680058 712562 712711 723031 730655
## 1 5 1 3 2 1 1 2 4 4 1 1 2
## 731273 731596 735480 735498 736033 736116 745612 775700 783423 783597 796888 818674 818708
## 8 4 1 1 2 1 1 5 5 1 3 4 2
## 818880 844159 844183 844266 899351 930958 932434 932491 932848 932871
## 1 2 3 1 2 11 1 2 6 1
## [1] "Frequency table after encoding"
## school2013_name1. Seleccione la escuela a la que asistió en el año escolar del 2013. Si la escuela
## 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
## 1 4 4 5 3 1 1 2 2 5 9 1 5 5 1 2 3 1979 3 1 4 2
## 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
## 8 2 5 5 1 3 9 1 5 1 1 5 1 5 2 1 2 1 4 2 1 6
## 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
## 1 1 1 3 1 1 10 2 2 1 1 3 4 4 1 4 5 2 4 2 6 2
## 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
## 1 1 1 3 1 6 5 6 2 3 2 1 1 4 1 6 11 6 4 1 5 1
## 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
## 7 1 1 1 1 1 5 1 10 3 10 1 1 4 3 6 1 4 1 3 1 6
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
## 1 8 2 1 2 1 2 4 4 1 2 3 4 3 3 2 4 1 3 5 5 1
## 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
## 2 1 1 4 4 11 7 4 1 6 1 2 4 1 1 1 1 6 3 2 6 2
## 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
## 2 5 1 2 2 2 1 2 4 1 1 1 1 1 4 7 1 6 1 5 1 1
## 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
## 4 8 3 3 1 4 1 1 2 3 7 1 4 8 4 5 3 7 1 8 9 3
## 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
## 1 1 5 2 7 8 2 2 1 3 2 1 1 5 2 2 2 1 5 6 1 3
## 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
## 6 2 4 3 2 1 4 8 1 1 1 5 2 7 2 3 1 4 1 3 5 2
## 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
## 1 7 2 1 1 1 3 4 7 2 8 4 1 1 1
# !!!Remove as contain identifying information
dropvars <- c("i9a",
"school2016",
"school_fixed",
"p11b",
"school2014_name",
"school2013_name",
"district_fixed1",
"centro_poblado")
mydata <- mydata[!names(mydata) %in% dropvars]
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# !!!Remove as contain identifying information
dropvars <- c("i15",
"i16",
"i16a",
"i18_fixed")
mydata <- mydata[!names(mydata) %in% dropvars]
# Top code days absent from school (5 or more)
mydata2 <- top_recode ("p12b", break_point=5, missing=c(888, 999999)) # Topcode cases with 5 or more adult household members.
## [1] "Frequency table before encoding"
## p12b. ¿En el mes pasado cuántos días faltaste a la escuela?
## 0 1 2 3 4 5 6 7 9 10 <NA>
## 1709 501 289 127 33 49 5 5 1 5 45
## [1] "Frequency table after encoding"
## p12b. ¿En el mes pasado cuántos días faltaste a la escuela?
## 0 1 2 3 4 5 or more <NA>
## 1709 501 289 127 33 65 45
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("i10",
"i12",
"i23",
"dropout",
"school_fixed_level",
"do_grade2015_fixed",
"do_approved_grade2015",
"do_2015_fixed",
"dropout_approved_fixed",
"p1a",
"p1a_1",
"p1a_2",
"p1a_3",
"dout_reasons",
"dout_reasons_2",
"dout_reasons_3",
"dout_reasons_4",
"dout_reasons_5",
"dout_reasons_6",
"dout_reasons_7",
"dout_reasons_8",
"dout_reasons_9",
"dout_reasons_10",
"dout_reasons_11",
"dout_reasons_12",
"dout_reasons_13",
"dout_reasons_14",
"dout_reasons_98",
"dout_reasons_99",
"dout_decision",
"p20",
"same_school2015",
"switcher_2016",
"asissted_2014",
"same_school2014",
"switcher_2015",
"dout2014",
"dout2014_1",
"dout2014_2",
"dout2014_3",
"dout2014_4",
"dout2014_5",
"dout2014_6",
"dout2014_7",
"dout2014_8",
"dout2014_9",
"dout2014_10",
"dout2014_11",
"dout2014_12",
"dout2014_13",
"dout2014_14",
"dout2014_98",
"dout2014_99",
"asissted_2013",
"same_school2013",
"switcher_2014",
"dout2013",
"dout2013_1",
"dout2013_2",
"dout2013_3",
"dout2013_4",
"dout2013_5",
"dout2013_6",
"dout2013_7",
"dout2013_8",
"dout2013_9",
"dout2013_10",
"dout2013_11",
"dout2013_12",
"dout2013_13",
"dout2013_14",
"dout2013_98",
"dout2013_99",
"a2",
"a2b",
"a2c",
"a2d",
"a3",
"a3b",
"a3c",
"a3d",
"a4",
"a4b",
"a4c",
"a4d",
"a5",
"a5b",
"a5c",
"a5d",
"a6",
"a6b",
"a6c",
"a6d",
"a7",
"a7b",
"a7c",
"a7d",
"a8",
"a8b",
"a8c",
"a8d",
"a9",
"a9b",
"a9c",
"a9d",
"a10",
"a10b",
"a10c",
"a10d",
"a11",
"a11b",
"a11c",
"a11d",
"m2",
"m2b",
"m2c",
"m2d",
"m3",
"m3b",
"m3c",
"m3d",
"m4",
"m4b",
"m4c",
"m4d",
"m5",
"m5b",
"m5c",
"m5d",
"m6",
"m6b",
"m6c",
"m6d",
"m7",
"m7b",
"m7c",
"m7d",
"m8",
"m8b",
"m8c",
"m8d",
"m9",
"m9b",
"m9c",
"m9d",
"m10",
"m10b",
"m10c",
"m10d",
"m11",
"m11b",
"m11c",
"m11d",
"p22a",
"p22b",
"p25_note",
"p25a1",
"p25a2",
"p25a3",
"p25b",
"p25c",
"p25d",
"p25e",
"p25_1_note1",
"p25_1a",
"p25_1b",
"p25_1c",
"p25_1d",
"p25_1e",
"p25_1f",
"p25_2g",
"p25_3h",
"p25_4i",
"p25_5j",
"p25_6k",
"p25_7l",
"p25_8m",
"p25_9n",
"p25_10o",
"p25_11p",
"p25_12q",
"p25_13r",
"p25_14s",
"p25_14t",
"p25_2_note",
"p25_2a",
"p25_2b",
"p25_2c",
"p25_2d",
"p25_2e",
"p25_2f",
"p25_2g1",
"p25_2h",
"p25_2i",
"p27_note",
"p27a",
"p27b",
"p27c",
"p27d",
"p27e")
capture_tables (indirect_PII)
# Recode those with very specific values.
mydata$p1a[mydata$p1a == "1 3"] <- "Otros"
mydata$p1a[mydata$p1a == "2 3"] <- "Otros"
break_activity <- c(-98,1,2,3,4,99)
labels_activity <- c("No se"=1,
"Porque mi escuela anterior no tenia nivel secundario"=2,
"Otro"=3,
"Otro"=4,
"Porque mi nueva escuela es mejor que mi antigua escuela"=5,
"Otro"=6)
mydata <- ordinal_recode (variable="switcher_2016", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## switcher_2016. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No sé
## 1
## Porque mi escuela anterior no tenía nivel secundario
## 961
## Porque mi nueva escuela está más cerca de mi casa
## 21
## Porque me mudé a otro centro poblado/distrito
## 18
## Porque mi nueva escuela es mejor que mi antigua escuela
## 43
## Otro
## 31
## <NA>
## 1694
## recoded
## [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
## -98 1 0 0 0 0 0
## 1 0 961 0 0 0 0
## 2 0 0 21 0 0 0
## 3 0 0 0 18 0 0
## 4 0 0 0 0 43 0
## 99 0 0 0 0 0 31
## [1] "Frequency table after encoding"
## switcher_2016. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No se
## 1
## Porque mi escuela anterior no tenia nivel secundario
## 961
## Otro
## 70
## Porque mi nueva escuela es mejor que mi antigua escuela
## 43
## <NA>
## 1694
## [1] "Inspect value labels and relabel as necessary"
## No se
## 1
## Porque mi escuela anterior no tenia nivel secundario
## 2
## Otro
## 3
## Otro
## 4
## Porque mi nueva escuela es mejor que mi antigua escuela
## 5
## Otro
## 6
labels_activity <- c("No se"=1,
"Porque mi escuela anterior no tenia nivel secundario"=2,
"Otro"=3,
"Otro"=4,
"Porque mi nueva escuela es mejor que mi antigua escuela"=5,
"Otro"=6)
mydata <- ordinal_recode (variable="switcher_2015", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## switcher_2015. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No sé
## 2
## Porque mi escuela anterior no tenía nivel secundario
## 971
## Porque mi nueva escuela está más cerca de mi casa
## 28
## Porque me mudé a otro centro poblado/distrito
## 23
## Porque mi nueva escuela es mejor que mi antigua escuela
## 48
## Otro
## 47
## <NA>
## 1650
## recoded
## [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
## -98 2 0 0 0 0 0
## 1 0 971 0 0 0 0
## 2 0 0 28 0 0 0
## 3 0 0 0 23 0 0
## 4 0 0 0 0 48 0
## 99 0 0 0 0 0 47
## [1] "Frequency table after encoding"
## switcher_2015. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No se
## 2
## Porque mi escuela anterior no tenia nivel secundario
## 971
## Otro
## 98
## Porque mi nueva escuela es mejor que mi antigua escuela
## 48
## <NA>
## 1650
## [1] "Inspect value labels and relabel as necessary"
## No se
## 1
## Porque mi escuela anterior no tenia nivel secundario
## 2
## Otro
## 3
## Otro
## 4
## Porque mi nueva escuela es mejor que mi antigua escuela
## 5
## Otro
## 6
labels_activity <- c("No se"=1,
"Porque mi escuela anterior no tenia nivel secundario"=2,
"Porque mi nueva escuela esta mas cerca de mi casa"=3,
"Otro"=4,
"Porque mi nueva escuela es mejor que mi antigua escuela"=5,
"Otro"=6)
mydata <- ordinal_recode (variable="switcher_2014", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## switcher_2014. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No sé
## 6
## Porque mi escuela anterior no tenía nivel secundario
## 960
## Porque mi nueva escuela está más cerca de mi casa
## 35
## Porque me mudé a otro centro poblado/distrito
## 29
## Porque mi nueva escuela es mejor que mi antigua escuela
## 56
## Otro
## 60
## <NA>
## 1623
## recoded
## [-98,1) [1,2) [2,3) [3,4) [4,99) [99,1e+06)
## -98 6 0 0 0 0 0
## 1 0 960 0 0 0 0
## 2 0 0 35 0 0 0
## 3 0 0 0 29 0 0
## 4 0 0 0 0 56 0
## 99 0 0 0 0 0 60
## [1] "Frequency table after encoding"
## switcher_2014. ¿Por qué te cambiaron/ te cambiaste de escuela?
## No se
## 6
## Porque mi escuela anterior no tenia nivel secundario
## 960
## Porque mi nueva escuela esta mas cerca de mi casa
## 35
## Otro
## 89
## Porque mi nueva escuela es mejor que mi antigua escuela
## 56
## <NA>
## 1623
## [1] "Inspect value labels and relabel as necessary"
## No se
## 1
## Porque mi escuela anterior no tenia nivel secundario
## 2
## Porque mi nueva escuela esta mas cerca de mi casa
## 3
## Otro
## 4
## Porque mi nueva escuela es mejor que mi antigua escuela
## 5
## Otro
## 6
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('i17', 'grado','i12') ##!!! Replace with candidate categorical demo vars
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2769 rows and 948 variables.
## --> Categorical key variables: i17, grado, i12
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## i17 14 (14) 197.786 (197.786) 1 (1)
## grado 4 (4) 692.250 (692.250) 16 (16)
## i12 3 (3) 1376.500 (1376.500) 1338 (1338)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 14 (0.506%)
## - 3-anonymity: 18 (0.650%)
## - 5-anonymity: 31 (1.120%)
##
## ----------------------------------------------------------------------
Show values of key variable of records that violate k-anonymity
mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 14 x 3
## i17 grado i12
## <chr> <chr> <dbl+lbl>
## 1 1997 9 2 [Mujer]
## 2 1997 10 1 [Hombre]
## 3 2001 8 2 [Mujer]
## 4 1974 8 1 [Hombre]
## 5 2000 8 1 [Hombre]
## 6 2006 9 1 [Hombre]
## 7 2006 10 2 [Mujer]
## 8 2000 8 2 [Mujer]
## 9 2007 9 1 [Hombre]
## 10 1998 10 2 [Mujer]
## 11 2001 8 1 [Hombre]
## 12 1999 8 2 [Mujer]
## 13 1995 9 1 [Hombre]
## 14 1998 9 2 [Mujer]
sdcFinal <- localSuppression(sdcInitial)
# Recombining anonymized variables
extractManipData(sdcFinal)[notAnon,selectedKeyVars] #manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used
## i17 grado i12
## 153 <NA> 9 2
## 475 <NA> 10 1
## 483 <NA> 8 2
## 967 <NA> 8 1
## 999 <NA> 8 1
## 1220 <NA> 9 1
## 1360 <NA> 10 2
## 1574 <NA> 8 2
## 1672 <NA> 9 1
## 1698 <NA> 10 2
## 1754 <NA> 8 1
## 1975 <NA> 8 2
## 2228 <NA> 9 1
## 2650 <NA> 9 2
mydata [notAnon,"i12"] <- NA
mydata [notAnon,"grado"] <- NA
# !!! Identify open-end variables here:
open_ends <- c("dout_reasons_1",
"v108",
"p13c1",
"p35a",
"p35a1",
"p35b",
"p35b1",
"switcher_2016_otro",
"switcher_2015_otro",
"dout2014_otro",
"switcher_2014_otro",
"dout2013_otro",
"a2_o",
"a2g",
"a3_o",
"a3g",
"a4_o",
"a4g",
"a5_o",
"a5g",
"a6_o",
"a6g",
"a7_o",
"a7g",
"a8_o",
"a8g",
"a9_o",
"a9g",
"a10_o",
"a10g",
"a11_o",
"a11g",
"m2_o",
"m2g",
"m3_o",
"m3g",
"m4_o",
"m4g",
"m5_o",
"m5g",
"m6_o",
"m6g",
"m7_o",
"m7g",
"m8_o",
"m8g",
"m9_o",
"m9g",
"m10_o",
"m10g",
"m11_o",
"m11g",
"q48",
"p1a")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
# !!! Remove, as they contain a lot of sensitive information and they are in Spanish.
mydata <- mydata[!names(mydata) %in% "dout_reasons_1"]
mydata <- mydata[!names(mydata) %in% "v108"]
mydata <- mydata[!names(mydata) %in% "p13c1"]
mydata <- mydata[!names(mydata) %in% "p35a"]
mydata <- mydata[!names(mydata) %in% "p35a1"]
mydata <- mydata[!names(mydata) %in% "p35b"]
mydata <- mydata[!names(mydata) %in% "p35b1"]
mydata <- mydata[!names(mydata) %in% "switcher_2016_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2015_otro"]
mydata <- mydata[!names(mydata) %in% "dout2014_otro"]
mydata <- mydata[!names(mydata) %in% "switcher_2014_otro"]
mydata <- mydata[!names(mydata) %in% "dout2013_otro"]
mydata <- mydata[!names(mydata) %in% "a2_o"]
mydata <- mydata[!names(mydata) %in% "a2g"]
mydata <- mydata[!names(mydata) %in% "a3_o"]
mydata <- mydata[!names(mydata) %in% "a3g"]
mydata <- mydata[!names(mydata) %in% "a4_o"]
mydata <- mydata[!names(mydata) %in% "a4g"]
mydata <- mydata[!names(mydata) %in% "a5_o"]
mydata <- mydata[!names(mydata) %in% "a5g"]
mydata <- mydata[!names(mydata) %in% "a6_o"]
mydata <- mydata[!names(mydata) %in% "a6g"]
mydata <- mydata[!names(mydata) %in% "a7_o"]
mydata <- mydata[!names(mydata) %in% "a7g"]
mydata <- mydata[!names(mydata) %in% "a8_o"]
mydata <- mydata[!names(mydata) %in% "a8g"]
mydata <- mydata[!names(mydata) %in% "a9_o"]
mydata <- mydata[!names(mydata) %in% "a9g"]
mydata <- mydata[!names(mydata) %in% "a10_o"]
mydata <- mydata[!names(mydata) %in% "a10g"]
mydata <- mydata[!names(mydata) %in% "a11_o"]
mydata <- mydata[!names(mydata) %in% "a11g"]
mydata <- mydata[!names(mydata) %in% "m2_o"]
mydata <- mydata[!names(mydata) %in% "m2g"]
mydata <- mydata[!names(mydata) %in% "m3_o"]
mydata <- mydata[!names(mydata) %in% "m3g"]
mydata <- mydata[!names(mydata) %in% "m4_o"]
mydata <- mydata[!names(mydata) %in% "m4g"]
mydata <- mydata[!names(mydata) %in% "m5_o"]
mydata <- mydata[!names(mydata) %in% "m5g"]
mydata <- mydata[!names(mydata) %in% "m6_o"]
mydata <- mydata[!names(mydata) %in% "m6g"]
mydata <- mydata[!names(mydata) %in% "m7_o"]
mydata <- mydata[!names(mydata) %in% "m7g"]
mydata <- mydata[!names(mydata) %in% "m8_o"]
mydata <- mydata[!names(mydata) %in% "m8g"]
mydata <- mydata[!names(mydata) %in% "m9_o"]
mydata <- mydata[!names(mydata) %in% "m9g"]
mydata <- mydata[!names(mydata) %in% "m10_o"]
mydata <- mydata[!names(mydata) %in% "m10g"]
mydata <- mydata[!names(mydata) %in% "m11_o"]
mydata <- mydata[!names(mydata) %in% "m11g"]
mydata <- mydata[!names(mydata) %in% "q48"]
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)