rm(list=ls(all=t))
filename <- "IDT_raw_parents_NOPII" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("prompt_student_id",
"student_id",
"student_name",
"no_guardian_name",
"grado2016_admin_name",
"sap_househead_name",
"consent_signature",
"consent_signature_paper",
"hh_name1",
"hh_name2",
"hh_lastname1",
"hh_lastname2",
"hh_dni",
"i31",
"p60",
"reference",
"i32",
"random_audio_hh",
"p27a1",
"p27a2",
"p27a3",
"p27a4",
"p27a5",
"p27a6",
"p27a7",
"p27a8",
"p27c1",
"p27c2",
"p27d1",
"p27d2",
"p27d3",
"p27d4",
"p27ma1",
"p27ma2",
"p27ma3",
"p27ma4",
"p27ma5",
"p27ma6",
"p27mc1",
"p27mc2",
"p27mc3",
"p27mc4",
"p27mc5",
"p27mc6",
"p27md1",
"address")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!Replace vector in "variables" field below with relevant variable names
dropvars <- c("i4")
mydata <- mydata[!names(mydata) %in% dropvars]
mydata <- encode_direct_PII_team (variables=c("i5"))
## [1] "Frequency table before encoding"
## i5. Seleccione el nombre del encuestador
## Missing-MINEDU
## 2735
## [1] "Frequency table after encoding"
## i5. Seleccione el nombre del encuestador
## 1
## 2735
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("cod_mod2016_admin", "district", "p63")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## cod_mod2016_admin.
## 1007160 1008440 1008929 1008960 1009844 1010040 1010149 1010180 1034016
## 1 1 16 2 4 6 2 10 1 3
## 1039676 1041516 1041557 1041631 1045111 1045434 1045715 1045798 1046226 1048990
## 1 1 2 15 5 3 1 10 1 3
## 1049493 1053628 1053669 1054154 1054196 1054238 1054352 1054394 1054436 1056902
## 1 8 5 8 3 4 4 1 1 4
## 1063023 1063106 1063148 1063221 1063304 1064989 1066026 1068238 1069954 1070077
## 1 8 12 5 8 3 7 3 3 10
## 1070390 1071919 1072040 1072727 1073212 1074301 1075779 1080068 1080258 1082874
## 4 9 1 2 1 5 1 6 3 1
## 1083633 1083674 1083716 1083815 1084508 1084987 1085851 1085976 1088400 1099654
## 2 2 4 2 11 1 3 2 5 1
## 1194265 1194380 1194810 1195189 1195577 1196526 1223023 1238229 1240720 1241454
## 11 11 4 7 1 3 6 5 9 1
## 1242908 1247832 1248392 1248509 1254192 1258334 1258649 1261742 1264340 1264670
## 1 2 11 1 1 1 3 3 1 2
## 1266840 1272822 1278662 1279124 1309392 1309574 1313444 1330315 1332220 1346675
## 2 2 1 10 2 8 3 1 6 1
## 1349448 1351410 1354091 1362318 1375211 1376870 1381342 1381599 1381896 1382829
## 1 1 1 2 2 1 4 8 2 4
## 1385251 1392893 1393453 1398148 1401801 1411438 1420694 1423615 1431667 1438027
## 1 1 2 7 1 2 3 10 1 2
## 1438035 1453232 1464668 1469675 1473511 1474600 1474964 1475011 1475201 1475284
## 2 1 1 1 1 2 1 9 11 7
## 1476258 1476464 1481514 1481720 1482975 1483627 1487339 1489822 1492255 1493964
## 1 3 1 1 1 1 1 1 1 1
## 1495365 1495407 1496314 1496355 1497007 1497551 1499748 1499961 1500354 1501188
## 10 5 1 2 4 1 1 1 4 2
## 1501451 1505494 1507094 1507250 1507276 1507532 1509108 1509496 1512789 1515360
## 11 14 12 12 1 12 1 2 1 1
## 1520279 1520287 1528520 1529981 1536994 1541879 1573328 1575323 1640556 1641521
## 1 4 1 1 1 1 1 1 2 10
## 1661271 1666882 1697234 1699933 1701002 207795 207894 207985 208058 208348
## 10 1 3 1 1 4 1 2 4 2
## 208538 208546 208561 208587 208694 208736 209304 209387 209510 209528
## 3 3 7 1 1 5 3 4 1 1
## 209536 209908 209916 209924 209940 209965 209973 210260 215632 245647
## 6 20 6 2 6 9 4 1 8 2
## 245654 245662 245670 245688 245696 245704 305656 314500 317131 317214
## 1 7 6 10 1 1 2 1 3 2
## 317289 317305 317313 317370 317453 317479 317560 317610 317941 318063
## 1 1 3 1 2 2 5 1 1 4
## 318089 318287 318352 318782 318949 319004 319020 319061 319145 319160
## 4 1 3 1 1 1 1 1 2 2
## 319285 320655 322479 322685 322974 323345 323378 325449 325464 325472
## 4 1 6 1 2 3 4 1 5 11
## 325480 325548 325555 325563 325589 325613 325647 325670 325704 328039
## 1 1 2 11 5 1 9 6 8 2
## 328047 328260 328468 328518 328526 329029 329128 329151 329573 329755
## 3 2 1 3 3 2 1 1 7 4
## 329805 330464 333666 334649 334656 334664 334672 334680 334706 334722
## 2 2 5 4 2 8 10 1 8 3
## 334730 334748 334847 334920 334987 335042 335091 335224 336495 336537
## 1 10 2 1 9 4 10 1 2 4
## 336545 336560 336586 336594 336610 336628 336636 337436 337568 337592
## 3 5 5 2 3 7 5 7 5 4
## 337733 337741 337766 338129 338228 338301 338343 338517 338640 338665
## 2 1 1 1 2 3 2 6 5 3
## 338848 339051 339317 339432 339499 339606 339804 340224 340281 340299
## 1 7 1 1 1 5 1 8 1 2
## 340315 340349 340372 340380 340398 340414 340422 340463 343566 432773
## 10 7 1 6 1 3 2 1 2 3
## 432906 433227 433235 433276 433490 433540 433680 433821 433961 434019
## 1 4 1 6 6 4 4 8 5 4
## 434076 434159 434191 434282 434464 434480 434498 434506 434548 434597
## 2 3 3 3 3 4 3 2 3 2
## 434829 436170 436212 436287 436303 436360 436444 436451 436493 436543
## 5 1 8 1 2 5 5 6 4 2
## 436584 436634 436642 436725 436766 436782 437210 437228 437236 437244
## 1 4 1 1 5 1 7 27 10 8
## 437251 437269 437277 437285 437319 437335 437343 437350 437400 437509
## 2 2 12 8 13 4 1 6 4 2
## 437525 437707 437715 437723 437731 437749 437772 449868 466730 468488
## 1 3 6 2 3 1 1 7 16 2
## 468611 469205 469700 481853 481903 482042 488619 488635 493239 493544
## 2 2 8 2 9 1 9 9 1 12
## 495259 495812 496166 496844 497024 499699 500124 500348 500611 501411
## 4 4 13 1 2 10 1 10 2 1
## 501502 501601 501676 501809 502435 502633 504993 505149 508903 510305
## 4 10 6 4 1 10 1 1 5 1
## 510800 513614 516674 519645 520486 521179 522318 522862 523423 523464
## 1 2 3 4 2 4 1 1 2 4
## 523621 523662 523761 526301 534321 535823 536029 536128 536151 536326
## 2 1 5 5 1 4 3 3 1 16
## 546002 555847 555862 555946 556266 556472 556548 556571 565119 565143
## 19 1 7 3 2 2 2 12 3 2
## 565200 565234 565267 566141 566158 566414 566430 566455 566463 566471
## 2 9 2 21 2 4 5 4 3 14
## 567743 567750 567768 578260 578278 578286 578336 578351 578401 578443
## 1 9 1 3 2 10 1 3 9 2
## 578518 578526 578534 578542 581710 581728 581736 581744 581777 581876
## 12 9 15 8 2 2 8 3 2 5
## 581892 581900 581991 582114 582122 582148 582163 582254 582304 582312
## 2 6 3 1 1 1 3 3 4 5
## 582387 582403 582411 582833 582866 582890 582932 582981 583013 583088
## 14 11 9 12 4 14 2 3 9 2
## 583328 583476 583567 583591 583922 598581 599159 599365 601492 603878
## 4 6 2 20 4 7 2 11 2 2
## 605469 605501 607424 607556 607697 628404 628602 628842 629261 629295
## 7 1 1 3 1 2 2 3 2 2
## 632299 632471 639922 642801 642892 643692 643783 643817 644880 647172
## 3 1 4 3 3 2 2 7 3 3
## 649129 649947 650002 650036 652081 656447 659623 659698 659722 659896
## 1 5 1 9 1 1 1 3 5 12
## 659953 662940 662957 663005 663013 663096 663112 663120 663138 663526
## 4 2 1 3 1 5 10 1 1 1
## 663534 663542 663559 663682 663971 664292 664508 664698 664748 664920
## 4 3 7 4 10 1 1 10 9 3
## 665489 691931 692434 692442 693499 693655 694547 694562 694570 694588
## 1 5 5 1 12 2 2 1 9 11
## 694596 694604 697557 703215 703223 703256 703736 703744 703751 704072
## 1 6 1 8 1 1 2 9 7 1
## 704312 704445 704460 705053 705129 705475 705772 725770 725861 728055
## 2 1 1 8 5 1 1 7 4 2
## 728196 728717 732321 732347 732495 739367 743773 743807 743815 743831
## 4 12 1 1 6 1 13 1 5 13
## 744540 744557 744573 751230 759399 759613 762120 762468 762773 762856
## 3 6 2 1 3 11 1 1 8 5
## 762864 762906 762914 763151 764134 764779 764936 765297 765305 765313
## 1 4 6 6 1 6 6 5 10 2
## 765321 765396 765412 765859 772970 773788 774026 774455 774679 774703
## 1 5 4 3 1 6 12 2 1 2
## 775312 775833 775874 777110 777144 777656 777680 777995 778027 778233
## 3 2 2 1 1 18 10 8 1 12
## 778738 778795 779041 779868 780759 780767 780791 781278 781302 781351
## 2 1 1 1 1 1 2 5 9 4
## 781369 781385 781831 781930 782102 820407 821082 824003 824813 825752
## 17 1 3 1 6 4 9 1 3 1
## 828962 832253 832279 832287 832303 832311 832337 834853 835058 846048
## 1 1 6 2 5 7 1 2 7 4
## 847087 855791 869198 870931 871160 872515 874198 874214 875476 879791
## 1 2 10 4 1 12 1 1 1 13
## 879817 883884 884510 884528 884544 884551 884593 884627 885517 900704
## 1 1 2 2 4 3 2 1 1 1
## 900761 900852 900910 900977 901033 901066 901082 901413 901587 915256
## 5 3 6 2 1 1 2 1 1 2
## [1] "Frequency table after encoding"
## cod_mod2016_admin.
## 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
## 10 11 2 1 5 4 3 4 1 5 2 1 2 3 12 8 1 1 8 3 10
## 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
## 1 8 2 1 1 1 3 10 1 4 3 6 10 1 7 7 5 14 1 1 20
## 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
## 1 10 2 4 3 4 12 15 8 4 1 1 13 6 10 1 1 2 19 4 2
## 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
## 1 2 3 9 4 1 3 1 1 11 2 9 1 2 2 1 3 1 1 2 1
## 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
## 1 6 2 12 2 2 3 1 1 3 4 10 2 3 1 1 4 21 6 2 2
## 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
## 1 9 3 1 2 4 5 5 2 7 2 2 5 2 3 1 3 10 1 3 1
## 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
## 1 1 1 7 1 8 13 12 9 1 4 1 1 3 1 4 9 11 3 11 6
## 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
## 5 1 1 2 1 2 7 12 10 1 2 2 1 15 12 11 2 12 1 6 5
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
## 7 4 3 2 2 1 1 4 1 3 2 10 5 9 4 3 1 1 11 2 1
## 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
## 2 1 2 2 1 2 4 2 4 9 4 1 2 1 1 1 8 1 6 4 7
## 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
## 1 2 1 2 8 3 1 1 9 2 2 2 1 5 1 2 12 5 1 8 4
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
## 6 20 6 1 7 1 1 5 3 1 2 9 1 2 2 1 1 12 1 10 2
## 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
## 2 1 1 1 27 1 2 7 1 2 1 1 5 1 1 1 10 4 1 3 1
## 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
## 1 1 1 3 4 3 6 3 9 3 2 4 6 1 1 4 12 5 13 1 1
## 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
## 8 8 7 5 3 4 1 1 1 7 4 6 5 10 2 2 1 1 1 1 11
## 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
## 1 2 8 18 1 3 2 8 4 5 4 10 2 2 2 9 2 3 1 6 1
## 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
## 3 1 1 3 4 4 16 1 3 9 2 3 3 1 11 2 7 4 10 1 12
## 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
## 2 2 17 3 2 4 1 1 2 2 1 3 1 1 1 2 2 9 8 9 1
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
## 4 2 2 1 3 4 9 9 2 2 5 1 1 2 12 4 2 4 1 1 6
## 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
## 1 16 1 5 5 1 1 1 2 4 1 10 2 5 9 4 13 3 4 1 1
## 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
## 6 5 3 3 2 1 2 2 8 10 2 3 3 5 2 12 3 3 3 14 3
## 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
## 3 4 5 1 2 5 10 7 4 1 3 2 4 6 2 2 10 8 4 3 1
## 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
## 5 9 1 5 3 12 1 1 1 16 2 5 14 14 2 3 1 2 3 4 7
## 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
## 1 3 10 11 1 2 10 10 3 1 6 1 2 8 5 2 9 1 1 11 2
## 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
## 6 1 1 10 4 1 2 1 8 3 6 1 4 1 6 1 1 7 4 3 11
## 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
## 1 5 7 4 1 1 11 2 6 1 2 2 1 1 4 1 4 3 1 1 2
## 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
## 5 4 2 1 4 5 2 1 1 5 6 2 4 6 5 6 4 5 1 1 1
## 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
## 1 2 6 1 3 2 6 2 6 1 1 1 2 5 3 1 1 8 7 1 4
## 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
## 1 2 3 4 2 7 2 5 5 1 3 1 1 5 1 10 1 1 3 1 2
## 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
## 1 7 3 5 3 5 6 1 8 1 3 9 2 1 1 1 3 2 1 1 1
## 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
## 1 1 1 1 6 1 1 1 12 2 1 1 5 2 4 3 7 7 7 10 1
## 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
## 8 9 1 6 3 1 8 8 13 1 2 4 2 2 4 3 1 3 2 1 3
## 917 918 919 920 921 922 923 924
## 6 1 1 1 2 1 2 4
## [1] "Frequency table before encoding"
## district. Distrito del hogar
## Callao Bellavista Carmen de la Legua Reynoso
## 196 14 5
## La Perla Ventanilla Lima Cercado
## 17 105 93
## Ancón Ate Breña
## 40 221 11
## Carabayllo Chaclacayo Chorrillos
## 68 19 39
## Cieneguilla Comas El Agustino
## 34 232 122
## Independencia Jesús María La Molina
## 64 5 8
## La Victoria Lince Los Olivos
## 24 2 51
## Lurigancho - Chosica Lurin Magdalena del Mar
## 132 24 11
## Pueblo Libre Miraflores Pachacamac
## 11 2 29
## Puente Piedra Punta Hermosa Punta Negra
## 88 9 1
## Rímac San Bartolo San Borja
## 86 1 10
## San Isidro San Juan de Lurigancho San Juan de Miraflores
## 1 253 99
## San Luis San Martín de Porres San Miguel
## 3 215 24
## Santa Anita Santa Rosa Santiago de Surco
## 50 1 19
## Surquillo Villa El Salvador Villa María del Triunfo
## 13 127 155
## <NA>
## 1
## [1] "Frequency table after encoding"
## district. Distrito del hogar
## 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
## 122 2 232 14 34 19 1 24 24 5 196 127 10 40 253 68 13
## 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## 105 39 51 215 88 11 3 19 11 86 9 221 24 1 64 132 93
## 379 380 381 382 383 384 385 386 387 388 389 <NA>
## 1 1 99 2 50 29 5 17 155 11 8 1
## [1] "Frequency table before encoding"
## p63. ¿En qué escuela está matriculado/a o está pensando matricular a ${student_name}
## 1007160 1007491 1008440 1008929 1008960 1009844 1010180 1034016 1039676
## 634 1 1 3 3 1 8 1 3 1
## 1041557 1041631 1045111 1045434 1045715 1045756 1045798 1048990 1049493 1053628
## 2 8 1 3 1 1 3 1 2 5
## 1053669 1054071 1054154 1054196 1054238 1054311 1054352 1054436 1056902 1063106
## 2 1 4 1 1 1 2 1 1 5
## 1063148 1063262 1063304 1063346 1064989 1066026 1068238 1069954 1070077 1070119
## 4 1 4 2 2 5 1 3 4 1
## 1070150 1071919 1072040 1072230 1072727 1074301 1074509 1075779 1083633 1083716
## 1 2 1 1 1 3 1 1 1 5
## 1083815 1084508 1085851 1085976 1097567 1194265 1194380 1194810 1194901 1195189
## 2 9 2 2 1 4 6 2 1 1
## 1195577 1196203 1199009 1223023 1238229 1240183 1240720 1241983 1242270 1242361
## 1 1 1 3 1 1 7 1 1 1
## 1242437 1242908 1243781 1247832 1248392 1257450 1257567 1258649 1263813 1264183
## 2 1 1 3 9 1 1 2 1 1
## 1264381 1264670 1266758 1266840 1267038 1267079 1272822 1278662 1279124 1309392
## 1 2 1 1 1 1 2 1 3 1
## 1309574 1311984 1312362 1312693 1320100 1324508 1324839 1332220 1335348 1335546
## 3 1 1 1 1 1 1 1 1 1
## 1341148 1343367 1345206 1346675 1349430 1352954 1354091 1357607 1362318 1369487
## 1 1 1 1 1 1 1 1 2 1
## 1369677 1372655 1375211 1376870 1381144 1381375 1381599 1381896 1382829 1383413
## 1 1 2 1 1 2 4 2 1 1
## 1384221 1385251 1392810 1392893 1398148 1399898 1402064 1406057 1412790 1420694
## 1 1 1 1 3 1 2 1 1 4
## 1423615 1426592 1431667 1437375 1438035 1459262 1464668 1469675 1472836 1474600
## 5 1 1 1 3 1 1 1 1 1
## 1475011 1475201 1475284 1475607 1475920 1482975 1483239 1483361 1484039 1485945
## 7 8 6 1 1 1 1 1 1 1
## 1486257 1487339 1488659 1489822 1493204 1495365 1495407 1495720 1496256 1496355
## 1 1 1 4 1 2 1 1 1 2
## 1497007 1497551 1497825 1498203 1501188 1501451 1505494 1507094 1507250 1507532
## 3 1 1 1 1 7 11 9 7 7
## 1507557 1507870 1509181 1509835 1510353 1512409 1515352 1519149 1520287 1524883
## 1 1 2 1 1 1 1 1 2 1
## 1524966 1528520 1529023 1531359 1534809 1535103 1535392 1549021 1575323 1595347
## 1 1 1 1 1 1 1 1 1 1
## 1632660 1638972 1640556 1641521 1654078 1661271 1662386 1664390 1664507 1664895
## 1 1 1 2 6 6 2 1 2 1
## 1669647 1677632 1691989 1697051 1697234 1699800 1699933 1700996 205880 206086
## 1 1 1 1 2 1 1 1 1 1
## 207845 208371 208538 209205 209304 209387 209510 209528 209536 209908
## 2 4 1 1 4 6 2 2 8 18
## 209916 209924 209940 209965 209973 210260 211458 215590 215707 215897
## 5 4 3 11 3 1 1 1 3 1
## 219410 245647 245654 245662 245670 245688 245696 262279 268029 275610
## 1 2 1 6 6 11 1 1 1 1
## 282632 286427 305656 314500 317206 317453 317495 317941 318048 318584
## 1 1 1 1 1 2 1 2 1 1
## 318931 318949 319145 319244 320655 322479 322529 322875 323345 323451
## 1 2 1 2 1 2 1 2 1 4
## 323733 324236 325449 325456 325464 325472 325480 325498 325555 325563
## 1 1 1 1 1 6 1 1 2 2
## 325589 325613 325647 325670 325696 325704 327650 328237 328252 328336
## 5 1 5 5 1 8 1 2 1 1
## 328351 328385 328872 329573 329755 329805 334649 334656 334664 334672
## 1 1 1 4 2 4 6 4 8 6
## 334680 334706 334722 334730 334748 334821 334847 336297 336495 336545
## 1 9 3 1 7 3 1 2 5 5
## 336560 336578 336586 336594 336602 336610 336628 336636 336990 337295
## 4 1 6 5 2 3 7 2 1 1
## 337436 337568 337592 337733 337741 337766 338640 339499 340224 340281
## 7 3 4 2 3 2 1 1 4 1
## 340299 340315 340349 340372 340380 340398 340414 340448 340463 343566
## 4 10 8 4 3 1 4 1 1 5
## 381814 411702 427690 432773 433037 433219 433235 433326 434035 434076
## 1 1 1 2 1 1 2 1 1 1
## 434159 434233 434282 434373 434381 434399 436261 436311 436352 436378
## 1 4 2 1 1 1 2 4 1 1
## 436386 436444 436543 436592 436642 436667 436725 436758 436782 437210
## 2 1 1 1 2 1 1 1 2 4
## 437228 437236 437244 437251 437269 437277 437285 437319 437327 437335
## 30 6 9 2 4 10 11 7 2 2
## 437350 437400 437442 437475 437509 437707 437715 437723 437731 437749
## 9 2 1 1 3 4 5 2 4 2
## 437772 449512 449819 449868 466730 469205 469700 478404 478420 481903
## 2 1 1 7 10 2 4 1 2 3
## 482042 488619 488635 493239 493544 494633 494732 495259 495812 496166
## 1 8 1 2 6 1 1 4 4 5
## 496265 496281 496521 496653 497081 498824 499699 500348 500611 501411
## 2 1 1 1 3 2 6 3 4 2
## 501502 501601 501676 501809 501957 502435 502484 504993 505149 510800
## 2 3 4 2 5 1 1 2 1 1
## 516773 519645 520486 521179 522318 523423 523621 523761 523860 525451
## 1 1 10 4 2 2 2 1 1 1
## 525857 526400 535823 536029 536128 536151 536326 541011 541649 542829
## 1 1 7 2 4 2 10 1 1 1
## 543116 546002 555599 555862 555946 556241 556266 556290 556472 556571
## 1 9 1 1 4 1 1 1 4 9
## 561662 565200 565234 565267 566141 566158 566422 566430 566448 566455
## 2 1 6 1 16 3 1 1 1 4
## 566463 566471 567743 567750 578260 578278 578286 578310 578336 578351
## 1 11 1 3 2 1 4 1 1 1
## 578401 578443 578518 578526 578534 578542 581710 581728 581736 581744
## 3 3 8 9 7 6 5 1 6 6
## 581876 581892 581900 582114 582148 582163 582171 582254 582304 582312
## 2 1 2 2 1 6 1 1 2 8
## 582387 582403 582411 582833 582866 582890 582981 583013 583088 583328
## 4 10 4 7 3 12 2 6 2 3
## 583476 583534 583567 583591 587121 598581 599159 599365 601492 601708
## 3 1 1 9 2 6 2 7 5 1
## 603878 605501 607424 607556 628826 628842 629261 639732 639922 642801
## 2 1 1 5 1 2 1 3 8 3
## 642892 642926 643692 643783 643874 644690 646646 646711 647172 647792
## 1 2 2 3 1 1 2 2 1 4
## 649129 649673 650002 650036 652081 659698 659722 659896 659953 662734
## 1 1 1 4 1 2 2 9 3 1
## 662742 662940 662957 663005 663096 663112 663138 663534 663542 663559
## 1 2 4 1 2 7 1 2 1 7
## 663609 663971 664292 664490 664698 664748 664912 664920 674564 689836
## 1 5 1 1 15 5 1 1 1 1
## 690008 692434 693465 693499 694315 694562 694570 694588 694604 697557
## 3 4 1 5 1 1 5 4 5 2
## 703215 703223 703231 703256 703413 703744 703751 704072 704460 705053
## 3 1 1 1 1 11 6 1 2 6
## 705160 705475 722918 725770 725861 728196 728717 732321 732339 732347
## 1 1 1 10 3 2 5 4 1 1
## 732461 732495 743773 743799 743831 744573 745448 753319 759613 762468
## 2 6 6 1 8 1 1 1 7 1
## 762773 762856 762864 762906 762914 763151 763169 764134 764936 765164
## 8 1 1 4 6 1 2 1 12 1
## 765297 765305 765313 765396 765412 773788 773846 773903 774026 774455
## 3 9 2 7 6 2 2 1 2 1
## 774679 774703 775346 775833 775874 777110 777334 777656 777680 777714
## 2 1 1 1 2 1 1 6 16 1
## 777995 778233 778738 779041 779868 780783 781278 781302 781369 781930
## 5 6 3 1 1 1 4 3 11 2
## 782078 782102 782615 824813 826479 828962 829176 829325 831305 832253
## 1 5 2 1 1 1 1 2 2 1
## 832279 832287 832311 832337 834960 834994 835033 846014 869198 870345
## 8 4 1 5 4 1 1 1 1 2
## 870444 871012 872515 874214 875443 879791 879817 883454 883884 884510
## 1 1 9 1 1 10 1 1 1 2
## 884528 884544 884551 884593 884601 884650 884825 885277 885392 900647
## 3 3 3 3 1 1 1 1 4 1
## 900761 900910 900977 901413 922054 99
## 3 5 2 1 1 225
## [1] "Frequency table after encoding"
## p63. ¿En qué escuela está matriculado/a o está pensando matricular a ${student_name}
## 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
## 2 2 1 1 1 3 4 1 1 2 1 1 1 4 1 1 2 1 2 1 1
## 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
## 1 2 3 2 1 1 1 1 1 1 1 1 2 6 1 2 2 1 1 4 2
## 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## 5 1 2 8 4 1 1 1 3 2 4 3 1 1 2 11 1 11 7 1 8
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
## 1 1 1 1 2 5 7 1 1 3 9 1 4 1 1 1 5 11 5 1 2
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
## 2 4 1 2 3 3 1 4 1 3 3 1 1 1 1 1 1 1 3 1 1
## 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
## 1 1 1 3 4 8 3 1 1 4 1 2 9 5 7 1 6 4 2 1 2
## 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 1 2 1 1 1 1 1 2 1 1 2 1 2 1 5 6 2 1 2 2 1
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
## 1 1 9 8 7 4 4 1 1 1 1 1 3 1 1 1 1 2 2 2 1
## 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## 2 1 1 11 2 3 1 1 2 3 1 5 1 1 1 4 2 1 1 1 7
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
## 1 10 2 1 2 1 1 1 1 1 1 1 2 16 6 1 1 1 1 1 3
## 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
## 10 4 7 1 1 9 3 15 6 1 4 5 1 1 1 1 1 1 5 1 1
## 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
## 1 3 1 3 4 1 2 4 1 1 9 6 2 2 10 4 1 12 1 5 5
## 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
## 1 2 1 2 2 3 6 3 5 10 2 5 1 2 1 6 1 4 2 1 1
## 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
## 3 2 1 1 1 5 3 1 3 1 2 1 2 1 8 1 2 2 9 4 7
## 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## 3 3 2 1 1 4 4 6 1 2 1 6 2 2 3 6 4 2 1 9 1
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
## 1 1 1 1 2 2 1 2 4 9 5 1 1 1 1 2 4 1 1 1 1
## 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
## 2 1 6 2 2 2 1 1 1 2 1 1 1 1 1 3 1 1 1 1 6
## 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
## 1 11 2 3 2 1 1 6 2 1 1 1 1 1 3 8 1 1 2 1 1
## 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
## 1 1 4 2 1 3 2 3 9 5 1 1 7 1 11 1 1 1 1 1 1
## 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
## 4 1 1 1 4 1 3 1 11 1 1 1 2 2 1 2 4 1 1 10 1
## 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
## 4 2 1 3 1 30 1 1 1 1 5 1 2 3 8 1 1 3 1 3 2
## 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
## 2 3 1 16 2 6 1 1 1 1 1 2 1 1 1 18 5 1 5 7 4
## 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
## 8 2 1 6 1 8 1 9 1 8 7 6 1 8 1 4 1 1 7 1 2
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
## 9 2 634 1 1 2 1 4 7 8 1 1 1 2 1 2 1 1 1 1 1
## 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
## 4 3 1 1 6 2 4 1 1 1 2 1 3 1 1 3 1 1 1 2 1
## 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
## 1 5 9 1 6 1 7 2 1 1 1 1 5 2 1 4 4 1 2 1 2
## 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
## 1 3 3 1 4 1 1 1 6 1 4 2 4 1 6 4 1 1 12 1 1
## 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
## 4 1 1 1 5 1 3 1 1 1 1 1 2 4 1 1 1 1 1 6 4
## 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
## 1 2 1 2 4 9 1 1 1 8 3 10 2 2 5 2 2 1 2 1 7
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
## 1 6 1 1 4 1 5 3 1 2 1 1 1 3 8 6 6 1 1 7 2
## 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
## 2 2 2 5 5 10 1 3 2 1 2 1 1 2 1 1 1 1 2 1 3
## 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
## 1 1 4 1 4 1 7 1 1 6 4 1 1 1 1 1 3 1 2 7 6
## 826 827 828 829 830 831 832 833 834 835 836 837 838 839
## 1 6 5 225 10 1 5 2 4 6 1 5 7 1
dropvars <- c("p62")
mydata <- mydata[!names(mydata) %in% dropvars]
# Recode ages
mydata$hh_ageinyears <- as.numeric(mydata$hh_ageinyears)
break_age <- c(18,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50)
labels_age <- c("30 or younger" =1,
"31"=2,
"32"=3,
"33"=4,
"34"=5,
"35"=6,
"36"=7,
"37"=8,
"38"=9,
"39"=10,
"40"=11,
"41"=12,
"42"=13,
"43"=14,
"44"=15,
"45"=16,
"46"=17,
"47"=18,
"48"=19,
"49"=20,
"50 or older"=21,
"NA" = 22)
mydata <- ordinal_recode (variable="hh_ageinyears", break_points=break_age, missing=999999, value_labels=labels_age)
## [1] "Frequency table before encoding"
## hh_ageinyears.
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
## 7 9 13 4 15 6 4 9 10 11 15 30 35 58 86 76 98
## 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## 122 104 125 139 127 142 126 117 126 101 108 104 82 77 84 66 72
## 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 57 41 50 48 35 27 23 20 9 11 13 11 12 7 9 5 3
## 69 70 71 72 73 74 75 76 77 78 79 80 83 84 87 <NA>
## 2 5 8 2 4 1 3 1 1 2 1 2 5 3 1 5
## recoded
## [18,31) [31,32) [32,33) [33,34) [34,35) [35,36) [36,37) [37,38) [38,39) [39,40)
## 18 7 0 0 0 0 0 0 0 0 0
## 19 9 0 0 0 0 0 0 0 0 0
## 20 13 0 0 0 0 0 0 0 0 0
## 21 4 0 0 0 0 0 0 0 0 0
## 22 15 0 0 0 0 0 0 0 0 0
## 23 6 0 0 0 0 0 0 0 0 0
## 24 4 0 0 0 0 0 0 0 0 0
## 25 9 0 0 0 0 0 0 0 0 0
## 26 10 0 0 0 0 0 0 0 0 0
## 27 11 0 0 0 0 0 0 0 0 0
## 28 15 0 0 0 0 0 0 0 0 0
## 29 30 0 0 0 0 0 0 0 0 0
## 30 35 0 0 0 0 0 0 0 0 0
## 31 0 58 0 0 0 0 0 0 0 0
## 32 0 0 86 0 0 0 0 0 0 0
## 33 0 0 0 76 0 0 0 0 0 0
## 34 0 0 0 0 98 0 0 0 0 0
## 35 0 0 0 0 0 122 0 0 0 0
## 36 0 0 0 0 0 0 104 0 0 0
## 37 0 0 0 0 0 0 0 125 0 0
## 38 0 0 0 0 0 0 0 0 139 0
## 39 0 0 0 0 0 0 0 0 0 127
## 40 0 0 0 0 0 0 0 0 0 0
## 41 0 0 0 0 0 0 0 0 0 0
## 42 0 0 0 0 0 0 0 0 0 0
## 43 0 0 0 0 0 0 0 0 0 0
## 44 0 0 0 0 0 0 0 0 0 0
## 45 0 0 0 0 0 0 0 0 0 0
## 46 0 0 0 0 0 0 0 0 0 0
## 47 0 0 0 0 0 0 0 0 0 0
## 48 0 0 0 0 0 0 0 0 0 0
## 49 0 0 0 0 0 0 0 0 0 0
## 50 0 0 0 0 0 0 0 0 0 0
## 51 0 0 0 0 0 0 0 0 0 0
## 52 0 0 0 0 0 0 0 0 0 0
## 53 0 0 0 0 0 0 0 0 0 0
## 54 0 0 0 0 0 0 0 0 0 0
## 55 0 0 0 0 0 0 0 0 0 0
## 56 0 0 0 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0 0 0 0
## 58 0 0 0 0 0 0 0 0 0 0
## 59 0 0 0 0 0 0 0 0 0 0
## 60 0 0 0 0 0 0 0 0 0 0
## 61 0 0 0 0 0 0 0 0 0 0
## 62 0 0 0 0 0 0 0 0 0 0
## 63 0 0 0 0 0 0 0 0 0 0
## 64 0 0 0 0 0 0 0 0 0 0
## recoded
## [40,41) [41,42) [42,43) [43,44) [44,45) [45,46) [46,47) [47,48) [48,49) [49,50)
## 18 0 0 0 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0 0 0 0
## 20 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0
## 22 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0
## 24 0 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 0 0 0 0 0
## 26 0 0 0 0 0 0 0 0 0 0
## 27 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0
## 29 0 0 0 0 0 0 0 0 0 0
## 30 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0
## 32 0 0 0 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0 0 0 0
## 34 0 0 0 0 0 0 0 0 0 0
## 35 0 0 0 0 0 0 0 0 0 0
## 36 0 0 0 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0 0 0 0
## 38 0 0 0 0 0 0 0 0 0 0
## 39 0 0 0 0 0 0 0 0 0 0
## 40 142 0 0 0 0 0 0 0 0 0
## 41 0 126 0 0 0 0 0 0 0 0
## 42 0 0 117 0 0 0 0 0 0 0
## 43 0 0 0 126 0 0 0 0 0 0
## 44 0 0 0 0 101 0 0 0 0 0
## 45 0 0 0 0 0 108 0 0 0 0
## 46 0 0 0 0 0 0 104 0 0 0
## 47 0 0 0 0 0 0 0 82 0 0
## 48 0 0 0 0 0 0 0 0 77 0
## 49 0 0 0 0 0 0 0 0 0 84
## 50 0 0 0 0 0 0 0 0 0 0
## 51 0 0 0 0 0 0 0 0 0 0
## 52 0 0 0 0 0 0 0 0 0 0
## 53 0 0 0 0 0 0 0 0 0 0
## 54 0 0 0 0 0 0 0 0 0 0
## 55 0 0 0 0 0 0 0 0 0 0
## 56 0 0 0 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0 0 0 0
## 58 0 0 0 0 0 0 0 0 0 0
## 59 0 0 0 0 0 0 0 0 0 0
## 60 0 0 0 0 0 0 0 0 0 0
## 61 0 0 0 0 0 0 0 0 0 0
## 62 0 0 0 0 0 0 0 0 0 0
## 63 0 0 0 0 0 0 0 0 0 0
## 64 0 0 0 0 0 0 0 0 0 0
## recoded
## [50,1e+06)
## 18 0
## 19 0
## 20 0
## 21 0
## 22 0
## 23 0
## 24 0
## 25 0
## 26 0
## 27 0
## 28 0
## 29 0
## 30 0
## 31 0
## 32 0
## 33 0
## 34 0
## 35 0
## 36 0
## 37 0
## 38 0
## 39 0
## 40 0
## 41 0
## 42 0
## 43 0
## 44 0
## 45 0
## 46 0
## 47 0
## 48 0
## 49 0
## 50 66
## 51 72
## 52 57
## 53 41
## 54 50
## 55 48
## 56 35
## 57 27
## 58 23
## 59 20
## 60 9
## 61 11
## 62 13
## 63 11
## 64 12
## [ reached getOption("max.print") -- omitted 19 rows ]
## [1] "Frequency table after encoding"
## hh_ageinyears
## 30 or younger 31 32 33 34 35
## 168 58 86 76 98 122
## 36 37 38 39 40 41
## 104 125 139 127 142 126
## 42 43 44 45 46 47
## 117 126 101 108 104 82
## 48 49 50 or older <NA>
## 77 84 560 5
## [1] "Inspect value labels and relabel as necessary"
## 30 or younger 31 32 33 34 35
## 1 2 3 4 5 6
## 36 37 38 39 40 41
## 7 8 9 10 11 12
## 42 43 44 45 46 47
## 13 14 15 16 17 18
## 48 49 50 or older NA
## 19 20 21 22
# Recode education attainment of adults to reduce risk of re-identification
break_edu <- c(-98,1,3,4,5,6,7,8,9)
labels_edu <- c("No se"=1,
"Pri Incomp or less"=2,
"Pri Comp"=3,
"Sec Incomp"=4,
"Sec Comp"=5,
"Tec Incomp"=6,
"Tec Comp"=7,
"Uni Incomp"=8,
"Uni Comp"=9)
mydata <- ordinal_recode (variable="p6_1", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6_1. Padre
## No sé Sin nivel Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp
## 10 4 74 146 344 751 84
## Tec Comp Uni Incomp Uni Comp <NA>
## 203 57 81 981
## recoded
## [-98,1) [1,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,1e+06)
## -98 10 0 0 0 0 0 0 0 0
## 1 0 4 0 0 0 0 0 0 0
## 2 0 74 0 0 0 0 0 0 0
## 3 0 0 146 0 0 0 0 0 0
## 4 0 0 0 344 0 0 0 0 0
## 5 0 0 0 0 751 0 0 0 0
## 6 0 0 0 0 0 84 0 0 0
## 7 0 0 0 0 0 0 203 0 0
## 8 0 0 0 0 0 0 0 57 0
## 9 0 0 0 0 0 0 0 0 81
## [1] "Frequency table after encoding"
## p6_1. Padre
## No se Pri Incomp or less Pri Comp Sec Incomp
## 10 78 146 344
## Sec Comp Tec Incomp Tec Comp Uni Incomp
## 751 84 203 57
## Uni Comp <NA>
## 81 981
## [1] "Inspect value labels and relabel as necessary"
## No se Pri Incomp or less Pri Comp Sec Incomp
## 1 2 3 4
## Sec Comp Tec Incomp Tec Comp Uni Incomp
## 5 6 7 8
## Uni Comp
## 9
break_edu <- c(-98,1,2,3,4,5,6)
labels_edu <- c("No se"=1,
"Sin nivel"=2,
"Pri Incomp"=3,
"Pri Comp"=4,
"Sec Incomp"=5,
"Sec Comp"=6,
"Tec Incomp/Comp or Uni Incomp/Comp"=7)
mydata <- ordinal_recode (variable="p6b1", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6b1. Abuelo / Abuela 1
## No sé Sin nivel Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp
## 17 42 89 87 48 77 9
## Tec Comp Uni Incomp Uni Comp <NA>
## 15 4 6 2341
## recoded
## [-98,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
## -98 17 0 0 0 0 0 0
## 1 0 42 0 0 0 0 0
## 2 0 0 89 0 0 0 0
## 3 0 0 0 87 0 0 0
## 4 0 0 0 0 48 0 0
## 5 0 0 0 0 0 77 0
## 6 0 0 0 0 0 0 9
## 7 0 0 0 0 0 0 15
## 8 0 0 0 0 0 0 4
## 9 0 0 0 0 0 0 6
## [1] "Frequency table after encoding"
## p6b1. Abuelo / Abuela 1
## No se Sin nivel
## 17 42
## Pri Incomp Pri Comp
## 89 87
## Sec Incomp Sec Comp
## 48 77
## Tec Incomp/Comp or Uni Incomp/Comp <NA>
## 34 2341
## [1] "Inspect value labels and relabel as necessary"
## No se Sin nivel
## 1 2
## Pri Incomp Pri Comp
## 3 4
## Sec Incomp Sec Comp
## 5 6
## Tec Incomp/Comp or Uni Incomp/Comp
## 7
break_edu <- c(-98,1,3,4)
labels_edu <- c("No se"=1,
"Pri Incomp or less"=2,
"Pri Comp"=3,
"Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp"=4)
mydata <- ordinal_recode (variable="p6b2", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6b2. Abuelo / Abuela 2
## No sé Sin nivel Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp
## 2 11 24 32 16 35 2
## Tec Comp Uni Comp <NA>
## 5 2 2606
## recoded
## [-98,1) [1,3) [3,4) [4,1e+06)
## -98 2 0 0 0
## 1 0 11 0 0
## 2 0 24 0 0
## 3 0 0 32 0
## 4 0 0 0 16
## 5 0 0 0 35
## 6 0 0 0 2
## 7 0 0 0 5
## 9 0 0 0 2
## [1] "Frequency table after encoding"
## p6b2. Abuelo / Abuela 2
## No se
## 2
## Pri Incomp or less
## 35
## Pri Comp
## 32
## Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp
## 60
## <NA>
## 2606
## [1] "Inspect value labels and relabel as necessary"
## No se
## 1
## Pri Incomp or less
## 2
## Pri Comp
## 3
## Sec Incomp/Comp, Tec Imcomp/Comp or Uni Incomp/Comp
## 4
break_edu <- c(-98,1,4,5,6)
labels_edu <- c("No se"=1,
"Pri Comp or less"=2,
"Sec Incomp"=3,
"Sec Comp"=4,
"Tec Incomp/Comp or Uni Incomp/Comp"=5)
mydata <- ordinal_recode (variable="p6c1", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6c1. Tío / Tía 1
## No sé Sin nivel Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp
## 6 5 9 17 36 120 7
## Tec Comp Uni Incomp Uni Comp <NA>
## 32 9 26 2468
## recoded
## [-98,1) [1,4) [4,5) [5,6) [6,1e+06)
## -98 6 0 0 0 0
## 1 0 5 0 0 0
## 2 0 9 0 0 0
## 3 0 17 0 0 0
## 4 0 0 36 0 0
## 5 0 0 0 120 0
## 6 0 0 0 0 7
## 7 0 0 0 0 32
## 8 0 0 0 0 9
## 9 0 0 0 0 26
## [1] "Frequency table after encoding"
## p6c1. Tío / Tía 1
## No se Pri Comp or less
## 6 31
## Sec Incomp Sec Comp
## 36 120
## Tec Incomp/Comp or Uni Incomp/Comp <NA>
## 74 2468
## [1] "Inspect value labels and relabel as necessary"
## No se Pri Comp or less
## 1 2
## Sec Incomp Sec Comp
## 3 4
## Tec Incomp/Comp or Uni Incomp/Comp
## 5
break_edu <- c(-98,1,5,6)
labels_edu <- c("No se"=1,
"Sec Incomp or less"=2,
"Sec Comp"=3,
"Tec Imcomp/Comp or Uni Incomp/Comp"=4)
mydata <- ordinal_recode (variable="p6c2", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6c2. Tío / Tía 2
## No sé Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp Tec Comp
## 3 6 5 16 64 2 19
## Uni Incomp Uni Comp <NA>
## 5 8 2607
## recoded
## [-98,1) [1,5) [5,6) [6,1e+06)
## -98 3 0 0 0
## 2 0 6 0 0
## 3 0 5 0 0
## 4 0 16 0 0
## 5 0 0 64 0
## 6 0 0 0 2
## 7 0 0 0 19
## 8 0 0 0 5
## 9 0 0 0 8
## [1] "Frequency table after encoding"
## p6c2. Tío / Tía 2
## No se Sec Incomp or less
## 3 27
## Sec Comp Tec Imcomp/Comp or Uni Incomp/Comp
## 64 34
## <NA>
## 2607
## [1] "Inspect value labels and relabel as necessary"
## No se Sec Incomp or less
## 1 2
## Sec Comp Tec Imcomp/Comp or Uni Incomp/Comp
## 3 4
break_edu <- c(-98,1,6)
labels_edu <- c("No se"=1,
"Sec Comp or less"=2,
"Tec Imcomp/Comp or Uni Incomp/Comp"=3)
mydata <- ordinal_recode (variable="p6c3", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## p6c3. Tío / Tía 3
## Pri Incomp Pri Comp Sec Incomp Sec Comp Tec Incomp Tec Comp Uni Incomp
## 1 1 6 21 4 5 1
## Uni Comp <NA>
## 4 2692
## recoded
## [-98,1) [1,6) [6,1e+06)
## 2 0 1 0
## 3 0 1 0
## 4 0 6 0
## 5 0 21 0
## 6 0 0 4
## 7 0 0 5
## 8 0 0 1
## 9 0 0 4
## [1] "Frequency table after encoding"
## p6c3. Tío / Tía 3
## Sec Comp or less Tec Imcomp/Comp or Uni Incomp/Comp
## 29 14
## <NA>
## 2692
## [1] "Inspect value labels and relabel as necessary"
## No se Sec Comp or less
## 1 2
## Tec Imcomp/Comp or Uni Incomp/Comp
## 3
# Top code household composition variables with large and unusual numbers
mydata <- top_recode ("p1", break_point=10, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p1. ¿Cuántas personas viven en total en el hogar?
## 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 <NA>
## 86 323 707 746 427 201 102 59 37 15 13 6 6 1 1 5
## [1] "Frequency table after encoding"
## p1. ¿Cuántas personas viven en total en el hogar?
## 2 3 4 5 6 7 8
## 86 323 707 746 427 201 102
## 9 10 or more <NA>
## 59 79 5
mydata <- top_recode ("p2c", break_point=5, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2c. Hermanos o hermanas de ${student_name}
## 0 1 2 3 4 5 6 7 8 <NA>
## 354 861 859 422 153 51 19 9 2 5
## [1] "Frequency table after encoding"
## p2c. Hermanos o hermanas de ${student_name}
## 0 1 2 3 4 5 or more <NA>
## 354 861 859 422 153 81 5
mydata <- top_recode ("p2d", break_point=2, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2d. Abuelos o abuelas vive de ${student_name}
## 0 1 2 3 <NA>
## 2333 268 126 3 5
## [1] "Frequency table after encoding"
## p2d. Abuelos o abuelas vive de ${student_name}
## 0 1 2 or more <NA>
## 2333 268 129 5
mydata <- top_recode ("p2e", break_point=3, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2e. Tíos o tías de ${student_name}
## 0 1 2 3 4 5 6 <NA>
## 2412 174 95 34 9 3 3 5
## [1] "Frequency table after encoding"
## p2e. Tíos o tías de ${student_name}
## 0 1 2 3 or more <NA>
## 2412 174 95 49 5
mydata <- top_recode ("p2f", break_point=2, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2f. Sobrinos de ${student_name}
## 0 1 2 3 4 6 <NA>
## 2511 135 51 19 13 1 5
## [1] "Frequency table after encoding"
## p2f. Sobrinos de ${student_name}
## 0 1 2 or more <NA>
## 2511 135 84 5
mydata <- top_recode ("p2g", break_point=3, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p2g. Otros familiares o miembros que vivan en el hogar
## 0 1 2 3 4 5 6 7 <NA>
## 2303 325 55 29 9 4 2 3 5
## [1] "Frequency table after encoding"
## p2g. Otros familiares o miembros que vivan en el hogar
## 0 1 2 3 or more <NA>
## 2303 325 55 47 5
# Top code high income to the 99.5 percentile
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_1)[na.exclude(mydata$p7_1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7_1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7_1. Padre
## -97 0 97 100 180 200 250 300 350 400 450 460 480 500
## 317 1 4 4 1 4 2 9 2 13 3 1 3 39
## 508 528 560 570 580 600 700 720 750 800 820 850 858 900
## 1 1 1 1 1 30 29 1 11 121 1 202 2 64
## 910 920 950 1000 1025 1050 1070 1100 1200 1240 1280 1300 1350 1400
## 1 1 2 205 1 2 1 15 182 1 1 38 1 30
## 1450 1460 1500 1508 1600 1700 1800 1900 2000 2050 2100 2200 2300 2400
## 2 1 144 1 28 5 40 2 70 1 1 6 1 8
## 2500 2580 2700 2800 2900 3000 3100 3200 3400 3500 3600 4000 4500 4900
## 15 1 2 5 1 18 1 1 1 4 1 5 1 1
## 5000 6000 15000 <NA>
## 1 1 1 1015
## [1] "Frequency table after encoding"
## p7_1. Padre
## -97 0 97 100 180 200
## 317 1 4 4 1 4
## 250 300 350 400 450 460
## 2 9 2 13 3 1
## 480 500 508 528 560 570
## 3 39 1 1 1 1
## 580 600 700 720 750 800
## 1 30 29 1 11 121
## 820 850 858 900 910 920
## 1 202 2 64 1 1
## 950 1000 1025 1050 1070 1100
## 2 205 1 2 1 15
## 1200 1240 1280 1300 1350 1400
## 182 1 1 38 1 30
## 1450 1460 1500 1508 1600 1700
## 2 1 144 1 28 5
## 1800 1900 2000 2050 2100 2200
## 40 2 70 1 1 6
## 2300 2400 2500 2580 2700 2800
## 1 8 15 1 2 5
## 2900 3000 3100 3200 3400 3500
## 1 18 1 1 1 4
## 3600 4000 or more <NA>
## 1 10 1015
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7_2)[na.exclude(mydata$p7_2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7_2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7_2. Madre
## -97 0 50 60 80 97 100 120 150 160 180 200 240 250 280 300 332
## 229 2 1 2 3 2 9 1 4 1 1 23 2 3 4 24 1
## 350 360 380 400 425 440 450 460 480 488 500 520 540 550 560 570 600
## 4 5 1 46 2 2 4 2 7 1 88 1 1 2 1 1 77
## 608 650 700 708 720 740 750 800 808 820 850 858 860 880 900 920 950
## 2 5 43 1 5 1 19 108 1 1 228 1 1 2 56 1 4
## 1000 1030 1050 1070 1090 1100 1200 1290 1300 1350 1400 1500 1600 1700 1800 2000 2200
## 99 1 2 2 1 11 54 1 11 1 9 63 14 2 7 10 2
## 2300 2350 2400 2500 2600 2800 3000 3500 4000 4500 <NA>
## 1 1 1 3 1 3 5 1 1 1 1391
## [1] "Frequency table after encoding"
## p7_2. Madre
## -97 0 50 60 80 97
## 229 2 1 2 3 2
## 100 120 150 160 180 200
## 9 1 4 1 1 23
## 240 250 280 300 332 350
## 2 3 4 24 1 4
## 360 380 400 425 440 450
## 5 1 46 2 2 4
## 460 480 488 500 520 540
## 2 7 1 88 1 1
## 550 560 570 600 608 650
## 2 1 1 77 2 5
## 700 708 720 740 750 800
## 43 1 5 1 19 108
## 808 820 850 858 860 880
## 1 1 228 1 1 2
## 900 920 950 1000 1030 1050
## 56 1 4 99 1 2
## 1070 1090 1100 1200 1290 1300
## 2 1 11 54 1 11
## 1350 1400 1500 1600 1700 1800
## 1 9 63 14 2 7
## 2000 2200 2300 2350 2400 2500
## 10 2 1 1 1 3
## 2600 2800 3000 or more <NA>
## 1 3 8 1391
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a1)[na.exclude(mydata$p7a1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a1. Hermano / Hermana 1
## -97 0 60 70 97 100 150 160 200 240 250 300 329 350 400 450 460
## 113 8 1 1 1 3 1 2 11 1 3 10 1 3 13 4 1
## 480 500 560 600 620 650 700 720 750 800 850 858 900 908 950 960 1000
## 2 18 1 23 1 3 8 3 5 59 125 1 16 1 3 1 45
## 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2300 2800 3000 <NA>
## 6 44 5 5 14 3 1 3 1 8 1 1 3 2148
## [1] "Frequency table after encoding"
## p7a1. Hermano / Hermana 1
## -97 0 60 70 97 100
## 113 8 1 1 1 3
## 150 160 200 240 250 300
## 1 2 11 1 3 10
## 329 350 400 450 460 480
## 1 3 13 4 1 2
## 500 560 600 620 650 700
## 18 1 23 1 3 8
## 720 750 800 850 858 900
## 3 5 59 125 1 16
## 908 950 960 1000 1100 1200
## 1 3 1 45 6 44
## 1300 1400 1500 1600 1700 1800
## 5 5 14 3 1 3
## 1900 2000 2300 2800 2926 or more <NA>
## 1 8 1 1 3 2148
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a2)[na.exclude(mydata$p7a2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a2. Hermano / Hermana 2
## -97 0 60 70 100 150 200 220 240 250 280 300 350 400 450 480 500
## 53 5 1 1 2 1 2 1 2 1 1 2 1 4 3 1 10
## 510 600 700 750 800 840 850 900 950 1000 1200 1400 1500 1600 1800 2000 2400
## 1 6 3 6 27 1 60 12 2 12 9 3 5 1 3 2 1
## <NA>
## 2490
## [1] "Frequency table after encoding"
## p7a2. Hermano / Hermana 2
## -97 0 60 70 100 150
## 53 5 1 1 2 1
## 200 220 240 250 280 300
## 2 1 2 1 1 2
## 350 400 450 480 500 510
## 1 4 3 1 10 1
## 600 700 750 800 840 850
## 6 3 6 27 1 60
## 900 950 1000 1200 1400 1500
## 12 2 12 9 3 5
## 1600 1800 2000 2017 or more <NA>
## 1 3 2 1 2490
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a3)[na.exclude(mydata$p7a3)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a3", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a3. Hermano / Hermana 3
## -97 0 8 80 120 200 380 400 450 520 600 700 750 800 840 850 900
## 20 1 1 1 1 2 1 1 1 1 1 2 3 3 1 15 4
## 1000 1200 1500 4800 <NA>
## 6 3 2 1 2664
## [1] "Frequency table after encoding"
## p7a3. Hermano / Hermana 3
## -97 0 8 80 120 200
## 20 1 1 1 1 2
## 380 400 450 520 600 700
## 1 1 1 1 1 2
## 750 800 840 850 900 1000
## 3 3 1 15 4 6
## 1200 1500 3975 or more <NA>
## 3 2 1 2664
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a4)[na.exclude(mydata$p7a4)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a4", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a4. Hermano / Hermana 4
## -97 0 100 200 500 750 800 850 1000 <NA>
## 9 1 1 1 1 1 2 8 2 2709
## [1] "Frequency table after encoding"
## p7a4. Hermano / Hermana 4
## -97 0 100 200 500 750
## 9 1 1 1 1 1
## 800 850 1000 or more <NA>
## 2 8 2 2709
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a5)[na.exclude(mydata$p7a5)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a5", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a5. Hermano / Hermana 5
## -97 520 850 858 1300 <NA>
## 3 1 5 1 1 2724
## [1] "Frequency table after encoding"
## p7a5. Hermano / Hermana 5
## -97 520 850 858 1284 or more <NA>
## 3 1 5 1 1 2724
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a6)[na.exclude(mydata$p7a6)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a6", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a6. Hermano / Hermana 6
## 850 1200 <NA>
## 2 1 2732
## [1] "Frequency table after encoding"
## p7a6. Hermano / Hermana 6
## 850 1196 or more <NA>
## 2 1 2732
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7a7)[na.exclude(mydata$p7a7)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7a7", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7a7. Hermano / Hermana 7
## 850 <NA>
## 1 2734
## [1] "Frequency table after encoding"
## p7a7. Hermano / Hermana 7
## 850 or more <NA>
## 1 2734
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7b1)[na.exclude(mydata$p7b1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7b1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7b1. Abuelo / Abuela 1
## -97 0 97 100 200 300 400 500 600 700 750 800 850 900 950 1000 1100
## 19 1 1 1 1 1 2 10 3 3 2 9 17 5 1 6 1
## 1200 1300 1500 1600 1800 2000 3000 6300 <NA>
## 6 1 7 1 3 1 1 1 2631
## [1] "Frequency table after encoding"
## p7b1. Abuelo / Abuela 1
## -97 0 97 100 200 300
## 19 1 1 1 1 1
## 400 500 600 700 750 800
## 2 10 3 3 2 9
## 850 900 950 1000 1100 1200
## 17 5 1 6 1 6
## 1300 1500 1600 1800 2000 3000
## 1 7 1 3 1 1
## 4913 or more <NA>
## 1 2631
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7b2)[na.exclude(mydata$p7b2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7b2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7b2. Abuelo / Abuela 2
## -97 0 100 300 400 500 600 700 750 800 850 900 950 1000 1200 1400 1500
## 6 3 1 1 1 1 2 2 1 2 6 2 1 3 2 1 1
## 2000 4000 <NA>
## 1 1 2697
## [1] "Frequency table after encoding"
## p7b2. Abuelo / Abuela 2
## -97 0 100 300 400 500
## 6 3 1 1 1 1
## 600 700 750 800 850 900
## 2 2 1 2 6 2
## 950 1000 1200 1400 1500 2000
## 1 3 2 1 1 1
## 3689 or more <NA>
## 1 2697
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c1)[na.exclude(mydata$p7c1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c1. Tío / Tía 1
## -97 97 200 300 320 400 450 480 500 600 700 750 770 800 850 858 900
## 47 3 4 2 1 3 1 1 4 3 10 1 1 19 48 1 13
## 950 1000 1100 1200 1300 1400 1500 1600 1800 2000 2400 2800 3000 <NA>
## 1 13 2 19 3 3 16 1 4 1 1 1 1 2507
## [1] "Frequency table after encoding"
## p7c1. Tío / Tía 1
## -97 97 200 300 320 400
## 47 3 4 2 1 3
## 450 480 500 600 700 750
## 1 1 4 3 10 1
## 770 800 850 858 900 950
## 1 19 48 1 13 1
## 1000 1100 1200 1300 1400 1500
## 13 2 19 3 3 16
## 1600 1800 2000 2400 2800 2819 or more
## 1 4 1 1 1 1
## <NA>
## 2507
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c2)[na.exclude(mydata$p7c2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c2. Tío / Tía 2
## -97 0 60 97 500 550 700 750 800 850 858 900 950 1000 1100 1200 1400
## 27 1 1 2 3 1 3 2 9 22 1 5 1 7 2 6 1
## 1500 1800 2000 2500 3500 <NA>
## 4 1 3 2 1 2630
## [1] "Frequency table after encoding"
## p7c2. Tío / Tía 2
## -97 0 60 97 500 550
## 27 1 1 2 3 1
## 700 750 800 850 858 900
## 3 2 9 22 1 5
## 950 1000 1100 1200 1400 1500
## 1 7 2 6 1 4
## 1800 2000 2500 3114 or more <NA>
## 1 3 2 1 2630
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c3)[na.exclude(mydata$p7c3)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c3", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c3. Tío / Tía 3
## -97 97 500 600 700 800 850 900 1000 1100 1500 2200 <NA>
## 11 2 2 1 1 3 6 2 2 1 3 1 2700
## [1] "Frequency table after encoding"
## p7c3. Tío / Tía 3
## -97 97 500 600 700 800
## 11 2 2 1 1 3
## 850 900 1000 1100 1500 2119 or more
## 6 2 2 1 3 1
## <NA>
## 2700
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c4)[na.exclude(mydata$p7c4)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c4", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c4. Tío / Tía 4
## -97 97 850 900 1100 1200 <NA>
## 3 1 1 2 1 2 2725
## [1] "Frequency table after encoding"
## p7c4. Tío / Tía 4
## -97 97 850 900 1100 1200 or more
## 3 1 1 2 1 2
## <NA>
## 2725
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c5)[na.exclude(mydata$p7c5)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c5", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c5. Tío / Tía 5
## -97 600 900 3200 <NA>
## 1 1 1 1 2731
## [1] "Frequency table after encoding"
## p7c5. Tío / Tía 5
## -97 600 900 3177 or more <NA>
## 1 1 1 1 2731
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7c6)[na.exclude(mydata$p7c6)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7c6", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7c6. Tío / Tía 6
## -97 1500 <NA>
## 1 1 2733
## [1] "Frequency table after encoding"
## p7c6. Tío / Tía 6
## -97 1500 or more <NA>
## 1 1 2733
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7d1)[na.exclude(mydata$p7d1)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7d1", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7d1. Sobrino / Sobrina 1
## -97 850 920 1000 1500 <NA>
## 6 5 1 2 1 2720
## [1] "Frequency table after encoding"
## p7d1. Sobrino / Sobrina 1
## -97 850 920 1000 1480 or more <NA>
## 6 5 1 2 1 2720
percentile_99.5 <- floor(quantile(na.exclude(mydata$p7d2)[na.exclude(mydata$p7d2)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p7d2", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p7d2. Sobrino / Sobrina 2
## -97 850 <NA>
## 1 1 2733
## [1] "Frequency table after encoding"
## p7d2. Sobrino / Sobrina 2
## -97 850 or more <NA>
## 1 1 2733
percentile_99.5 <- floor(quantile(na.exclude(mydata$p49)[na.exclude(mydata$p49)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="p49", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## p49. ¿Cuánto gasta cada mes en total en la educación de todos sus hijos que viven en
## 0 1 8 10 20 25 30 33 34 40 45 50 58 60 70 72 75
## 63 1 2 1 16 2 31 1 1 11 2 131 2 30 18 1 1
## 80 84 90 100 108 120 130 140 150 160 170 180 190 200 208 210 215
## 52 1 4 280 3 19 5 1 171 5 1 12 2 333 3 3 1
## 220 230 240 250 260 270 280 282 300 305 308 310 320 330 350 360 372
## 2 2 4 83 1 1 8 1 321 1 2 1 3 3 57 1 1
## 375 380 385 390 400 408 410 440 450 460 473 480 500 508 550 580 600
## 1 2 1 1 192 3 1 1 21 1 1 2 248 2 5 1 105
## 608 650 700 730 750 757 770 800 840 850 900 960 1000 1008 1100 1200 1230
## 3 6 52 1 4 1 1 77 1 4 18 1 109 1 2 26 1
## 1300 1400 1470 1500 1508 1600 1700 1800 2000 2100 2200 2500 2800 3000 3400 3500 4000
## 4 2 1 51 1 2 1 6 35 1 1 7 1 10 1 1 4
## 5000 6000 7000 <NA>
## 1 1 1 5
## [1] "Frequency table after encoding"
## p49. ¿Cuánto gasta cada mes en total en la educación de todos sus hijos que viven en
## 0 1 8 10 20 25
## 63 1 2 1 16 2
## 30 33 34 40 45 50
## 31 1 1 11 2 131
## 58 60 70 72 75 80
## 2 30 18 1 1 52
## 84 90 100 108 120 130
## 1 4 280 3 19 5
## 140 150 160 170 180 190
## 1 171 5 1 12 2
## 200 208 210 215 220 230
## 333 3 3 1 2 2
## 240 250 260 270 280 282
## 4 83 1 1 8 1
## 300 305 308 310 320 330
## 321 1 2 1 3 3
## 350 360 372 375 380 385
## 57 1 1 1 2 1
## 390 400 408 410 440 450
## 1 192 3 1 1 21
## 460 473 480 500 508 550
## 1 1 2 248 2 5
## 580 600 608 650 700 730
## 1 105 3 6 52 1
## 750 757 770 800 840 850
## 4 1 1 77 1 4
## 900 960 1000 1008 1100 1200
## 18 1 109 1 2 26
## 1230 1300 1400 1470 1500 1508
## 1 4 2 1 51 1
## 1600 1700 1800 2000 2100 2200
## 2 1 6 35 1 1
## 2500 2800 3000 or more <NA>
## 7 1 19 5
# Top code number of rooms variables with large and unusual numbers
mydata <- top_recode ("p9", break_point=7, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## p9. ¿Cuántas habitaciones tiene esta vivienda sin incluir cocina, baños, pasillos ni
## 1 2 3 4 5 6 7 8 9 10 11 12 <NA>
## 460 999 717 342 125 57 15 7 3 2 1 2 5
## [1] "Frequency table after encoding"
## p9. ¿Cuántas habitaciones tiene esta vivienda sin incluir cocina, baños, pasillos ni
## 1 2 3 4 5 6 7 or more <NA>
## 460 999 717 342 125 57 30 5
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("student_female",
"hh_gender",
"attending_confirm",
"grado2016_confirm",
"p2a",
"p2b",
"p3a1",
"p3a2",
"p3a3",
"p3a4",
"p3a5",
"p3a6",
"p3a7",
"p3a8",
"p3b1",
"p3b2",
"p3b3",
"p3c1",
"p3c2",
"p3c3",
"p3c4",
"p3c5",
"p3c6",
"p3d1",
"p3d2",
"p3d3",
"p3d4",
"p3d5",
"p3d6",
"p26a1",
"p26a2",
"p26a3",
"p26a4",
"p26a5",
"p26a6",
"p26a7",
"p26a8",
"p26c1",
"p26c2",
"p26c3",
"p26c4",
"p26c5",
"p26c6",
"p26d1",
"p26d2",
"p26d3",
"p26d4",
"p26d5",
"p26d6",
"p4_1",
"p4_2",
"p4a1",
"p4a2",
"p4a3",
"p4a4",
"p4a5",
"p4a6",
"p4a7",
"p4a8",
"p4b1",
"p4b2",
"p4b3",
"p4c1",
"p4c2",
"p4c3",
"p4c4",
"p4c5",
"p4c6",
"p4d1",
"p4d2",
"p4d3",
"p5_aa1",
"p5_aa2",
"p5_aa3",
"p5_aa4",
"p5_aa5",
"p5_aa6",
"p5_aa7",
"p5_aa8",
"p23a",
"p23_1",
"p23_2",
"p23a1",
"p23a2",
"p23a3",
"p23a4",
"p23a5",
"p23a6",
"p23a7",
"p23a8",
"p23b1",
"p23b2",
"p23b3",
"p23c1",
"p23c2",
"p23c3",
"p23c4",
"p23c5",
"p23c6",
"p23d1",
"p23d2",
"p23d3",
"p8",
"p42",
"p43",
"p44b",
"p44b_1",
"p44b_2",
"p44b_3",
"p44b_4",
"p44b_5",
"p44b_6",
"p44b_7",
"p44b_8",
"p44b_99",
"p50",
"p53",
"p55",
"p59")
capture_tables (indirect_PII)
# Recode those with very specific values.
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
"Estudia y tiene un trabajo remunerado"=2,
"Trabajo remunerado"=3,
"Quehaceres del hogar o trabajo no remunerado"=4,
"No hace nada"=5)
mydata <- ordinal_recode (variable="p4b1", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## p4b1. Abuelo / Abuela 1
## Estudia
## 3
## Trabajo remunerado
## 104
## Quehaceres del hogar o trabajo no remunerado
## 147
## No hace nada
## 143
## <NA>
## 2338
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,1e+06)
## 1 3 0 0 0 0
## 3 0 0 104 0 0
## 4 0 0 0 147 0
## 5 0 0 0 0 143
## [1] "Frequency table after encoding"
## p4b1. Abuelo / Abuela 1
## Otros
## 3
## Trabajo remunerado
## 104
## Quehaceres del hogar o trabajo no remunerado
## 147
## No hace nada
## 143
## <NA>
## 2338
## [1] "Inspect value labels and relabel as necessary"
## Otros
## 1
## Estudia y tiene un trabajo remunerado
## 2
## Trabajo remunerado
## 3
## Quehaceres del hogar o trabajo no remunerado
## 4
## No hace nada
## 5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
"Otros"=2,
"Trabajo remunerado"=3,
"Quehaceres del hogar o trabajo no remunerado"=4,
"Otros"=5)
mydata <- ordinal_recode (variable="p4c1", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## p4c1. Tío / Tía 1
## Estudia
## 19
## Estudia y tiene un trabajo remunerado
## 13
## Trabajo remunerado
## 215
## Quehaceres del hogar o trabajo no remunerado
## 35
## No hace nada
## 17
## <NA>
## 2436
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,1e+06)
## 1 19 0 0 0 0
## 2 0 13 0 0 0
## 3 0 0 215 0 0
## 4 0 0 0 35 0
## 5 0 0 0 0 17
## [1] "Frequency table after encoding"
## p4c1. Tío / Tía 1
## Otros
## 49
## Trabajo remunerado
## 215
## Quehaceres del hogar o trabajo no remunerado
## 35
## <NA>
## 2436
## [1] "Inspect value labels and relabel as necessary"
## Otros
## 1
## Otros
## 2
## Trabajo remunerado
## 3
## Quehaceres del hogar o trabajo no remunerado
## 4
## Otros
## 5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
"Otros"=2,
"Trabajo remunerado"=3,
"Otros"=4,
"Otros"=5)
mydata <- ordinal_recode (variable="p4c2", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## p4c2. Tío / Tía 2
## Estudia
## 7
## Estudia y tiene un trabajo remunerado
## 2
## Trabajo remunerado
## 103
## Quehaceres del hogar o trabajo no remunerado
## 21
## No hace nada
## 4
## <NA>
## 2598
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,1e+06)
## 1 7 0 0 0 0
## 2 0 2 0 0 0
## 3 0 0 103 0 0
## 4 0 0 0 21 0
## 5 0 0 0 0 4
## [1] "Frequency table after encoding"
## p4c2. Tío / Tía 2
## Otros Trabajo remunerado <NA>
## 34 103 2598
## [1] "Inspect value labels and relabel as necessary"
## Otros Otros Trabajo remunerado Otros
## 1 2 3 4
## Otros
## 5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Otros"=1,
"Otros"=2,
"Trabajo remunerado"=3,
"Otros"=4,
"Otros"=5)
mydata <- ordinal_recode (variable="p4c3", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## p4c3. Tío / Tía 3
## Estudia
## 3
## Estudia y tiene un trabajo remunerado
## 1
## Trabajo remunerado
## 34
## Quehaceres del hogar o trabajo no remunerado
## 7
## No hace nada
## 2
## <NA>
## 2688
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,1e+06)
## 1 3 0 0 0 0
## 2 0 1 0 0 0
## 3 0 0 34 0 0
## 4 0 0 0 7 0
## 5 0 0 0 0 2
## [1] "Frequency table after encoding"
## p4c3. Tío / Tía 3
## Otros Trabajo remunerado <NA>
## 13 34 2688
## [1] "Inspect value labels and relabel as necessary"
## Otros Otros Trabajo remunerado Otros
## 1 2 3 4
## Otros
## 5
break_activity <- c(1,2,3,4,5)
labels_activity <- c("Estudia"=1,
"Otros"=2,
"Otros"=3,
"Otros"=4,
"Otros"=5)
mydata <- ordinal_recode (variable="p4d1", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## p4d1. Sobrino / Sobrina 1
## Estudia Estudia y tiene un trabajo remunerado
## 35 2
## Trabajo remunerado No hace nada
## 13 7
## <NA>
## 2678
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,1e+06)
## 1 35 0 0 0 0
## 2 0 2 0 0 0
## 3 0 0 13 0 0
## 5 0 0 0 0 7
## [1] "Frequency table after encoding"
## p4d1. Sobrino / Sobrina 1
## Estudia Otros <NA>
## 35 22 2678
## [1] "Inspect value labels and relabel as necessary"
## Estudia Otros Otros Otros Otros
## 1 2 3 4 5
break_material <- c(1,2,3,4,5,6,7,8,9)
labels_material <- c("Concreto, ladrillos o cemento"=1,
"Otro"=2,
"Calamina de metal o metal"=3,
"Calamina de plastico o plastico"=4,
"Madera"=5,
"Otro"=6,
"Otro"=7,
"Otro"=8,
"Otro"=9)
mydata <- ordinal_recode (variable="p8", break_points=break_material, missing=999999, value_labels=labels_material)
## [1] "Frequency table before encoding"
## p8. Material principal de construcción del techo del hogar
## Concreto, ladrillos o cemento Tejas
## 1734 17
## Calamina de metal o metal Calamina de plástico o plástico
## 455 174
## Madera Cartón
## 284 10
## Adobe Paja
## 18 3
## Otro <NA>
## 35 5
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,1e+06)
## 1 1734 0 0 0 0 0 0 0 0
## 2 0 17 0 0 0 0 0 0 0
## 3 0 0 455 0 0 0 0 0 0
## 4 0 0 0 174 0 0 0 0 0
## 5 0 0 0 0 284 0 0 0 0
## 6 0 0 0 0 0 10 0 0 0
## 7 0 0 0 0 0 0 18 0 0
## 8 0 0 0 0 0 0 0 3 0
## 99 0 0 0 0 0 0 0 0 35
## [1] "Frequency table after encoding"
## p8. Material principal de construcción del techo del hogar
## Concreto, ladrillos o cemento Otro
## 1734 83
## Calamina de metal o metal Calamina de plastico o plastico
## 455 174
## Madera <NA>
## 284 5
## [1] "Inspect value labels and relabel as necessary"
## Concreto, ladrillos o cemento Otro
## 1 2
## Calamina de metal o metal Calamina de plastico o plastico
## 3 4
## Madera Otro
## 5 6
## Otro Otro
## 7 8
## Otro
## 9
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('hh_gender', 'hh_ageinyears', 'p4_1') ##!!! Replace with candidate categorical demo vars
# weight variable (add if available)
# selectedWeightVar = c('projwt') ##!!! Replace with weight var
# household id variable (cluster)
# selectedHouseholdID = c('wpid') ##!!! Replace with household id
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2735 rows and 804 variables.
## --> Categorical key variables: hh_gender, hh_ageinyears, p4_1
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## hh_gender 3 (3) 1365.000 (1365.000) 482
## hh_ageinyears 22 (22) 130.000 (130.000) 58
## p4_1 6 (6) 354.800 (354.800) 1
##
## (482)
## (58)
## (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 5 (0.183%)
## - 3-anonymity: 7 (0.256%)
## - 5-anonymity: 11 (0.402%)
##
## ----------------------------------------------------------------------
Show values of key variable of records that violate k-anonymity
#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 5 x 3
## hh_gender hh_ageinyears p4_1
## <dbl+lbl> <dbl+lbl> <dbl+lbl>
## 1 1 [Hombre] 10 [39] 5 [No hace nada]
## 2 1 [Hombre] 9 [38] 2 [Estudia y tiene un trabajo remunerado]
## 3 1 [Hombre] 11 [40] 2 [Estudia y tiene un trabajo remunerado]
## 4 1 [Hombre] 16 [45] 4 [Quehaceres del hogar o trabajo no remunerado]
## 5 1 [Hombre] 8 [37] 2 [Estudia y tiene un trabajo remunerado]
sdcFinal <- localSuppression(sdcInitial)
# Recombining anonymized variables
extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
## hh_gender hh_ageinyears p4_1
## 757 1 NA 5
## 924 1 NA 2
## 2084 1 NA 2
## 2096 1 NA 4
## 2220 1 NA 2
mydata [notAnon,"hh_gender"] <- NA
mydata[notAnon,"p4_1"] <- NA
# !!! Identify open-end variables here:
open_ends <- c("hh_parentesco_other",
"pref65f",
"pref66f",
"p44c",
"p51a",
"i19a1",
"p8a")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata <- mydata[!names(mydata) %in% "hh_parentesco_other"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "pref65f"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "pref66f"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p44c"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p51a"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "i19a1"] # Drop as actually verbatim data in Spanish
mydata <- mydata[!names(mydata) %in% "p8a"] # Drop as actually verbatim data in Spanish
# Setup map
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)