Installation

First you need to download intsvy package from eldafani github repository and PIAAC package with piaac data from pbiecek repository.

library(devtools)
library(reshape)
library(ggplot2)
if (!require(intsvy)) {
  install_github("eldafani/intsvy")
#  install_github("pbiecek/intsvy")
}
if (!require(PIAAC)) {
  install_github("pbiecek/PIAAC")
}

dim(piaac)
## [1] 152514    610

Contingency tables

The piaac.table() function calculates proportions of groups defined by ‘variable’ in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.

# age distribution in whole dataset
(ptable <- piaac.table(variable="AGEG10LFS", data=piaac))
##    AGEG10LFS  Freq Percentage Std.err.
## 1 24 or less 29242      16.81     0.02
## 2      25-34 28779      20.25     0.03
## 3      35-44 30705      21.20     0.02
## 4      45-54 31338      21.53     0.02
## 5    55 plus 32450      20.22     0.02
# age distribution within countries
head(ptableC <- piaac.table(variable="AGEG10LFS", by="CNTRYID", data=piaac))
##   CNTRYID  AGEG10LFS Freq Percentage Std.err.
## 1 Austria 24 or less  898      16.00     0.04
## 2 Austria      25-34  958      19.11     0.06
## 3 Austria      35-44 1117      22.18     0.07
## 4 Austria      45-54 1188      23.83     0.07
## 5 Austria    55 plus  969      18.89     0.04
## 6 Belgium 24 or less  994      15.33     0.03
# age distribution within countries and gender segments
head(ptableCA <- piaac.table(variable="AGEG10LFS", by=c("CNTRYID", "GENDER_R"), data=piaac))
##   CNTRYID GENDER_R  AGEG10LFS Freq Percentage Std.err.
## 1 Austria   Female 24 or less  450      15.55     0.06
## 2 Austria   Female      25-34  479      19.32     0.07
## 3 Austria   Female      35-44  557      22.09     0.10
## 4 Austria   Female      45-54  607      23.84     0.09
## 5 Austria   Female    55 plus  507      19.20     0.05
## 6 Austria     Male 24 or less  448      16.45     0.07

The output of piaac.table() function is of the class intsvy.table and plot() is one of overloaded functions for this class.

# age distribution in whole dataset
plot(ptable)

plot of chunk unnamed-chunk-3

# age distribution within countries
plot(ptableC, stacked=TRUE)

plot of chunk unnamed-chunk-3

# age distribution within countries and gender segments
plot(na.omit(ptableCA), stacked=TRUE)

plot of chunk unnamed-chunk-3

Averages and group averages for plausible values

The piaac.mean.pv() function calculates averages of variable ‘pvlabel’ in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.

Note that ‘pvlab’ is one of ‘LIT’ (for literacy), ‘NUM’ (for numeracy), ‘PSL’ (for problem solving). In piaac data there are 10 plausible values for each of these dimensions.

#Table A2.2a from SkillsOutlook2013_ENG_Table_Chapter2

# Country averages
head(pmeansNC <- piaac.mean.pv(pvlabel="NUM", by="CNTRYID", data=piaac, export=FALSE))
##          CNTRYID  Freq  Mean s.e.    SD  s.e
## 1        Austria  5130 275.0 0.88 48.84 0.64
## 2        Belgium  5463 280.4 0.83 49.27 0.67
## 3         Canada 26683 265.2 0.70 55.60 0.54
## 4 Czech Republic  6102 275.7 0.93 43.59 0.78
## 5        Denmark  7328 278.3 0.73 51.13 0.59
## 6        Estonia  7632 273.1 0.53 45.45 0.48
# Country averages for different age groups
head(pmeansNCA <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "AGEG10LFS"), data=piaac, export=FALSE))
##   CNTRYID  AGEG10LFS Freq  Mean s.e.    SD  s.e
## 1 Austria 24 or less  898 279.3 1.63 46.15 1.82
## 2 Austria      25-34  958 282.1 1.73 49.98 1.63
## 3 Austria      35-44 1117 281.4 2.01 50.26 1.40
## 4 Austria      45-54 1188 274.5 1.67 46.49 1.24
## 5 Austria    55 plus  969 257.5 1.74 46.83 1.47
## 6 Belgium 24 or less  994 282.8 1.74 45.07 1.63
# Country averages for different age and gender groups
head(pmeansNCAG <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "AGEG10LFS", "GENDER_R"), data=piaac, export=FALSE))
##   CNTRYID  AGEG10LFS GENDER_R Freq  Mean s.e.    SD  s.e
## 1 Austria 24 or less   Female  450 274.3 2.46 44.10 2.12
## 2 Austria 24 or less     Male  448 284.0 2.47 47.53 2.85
## 3 Austria      25-34   Female  479 275.5 2.50 48.82 2.28
## 4 Austria      25-34     Male  479 288.8 2.35 50.26 2.39
## 5 Austria      35-44   Female  557 273.6 2.58 48.85 1.95
## 6 Austria      35-44     Male  560 289.1 2.84 50.47 2.34
# Country averages for different age and gender groups (changed order)
head(pmeansNCGA <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "GENDER_R", "AGEG10LFS"), data=piaac, export=FALSE))
##   CNTRYID GENDER_R  AGEG10LFS Freq  Mean s.e.    SD  s.e
## 1 Austria   Female 24 or less  450 274.3 2.46 44.10 2.12
## 2 Austria   Female      25-34  479 275.5 2.50 48.82 2.28
## 3 Austria   Female      35-44  557 273.6 2.58 48.85 1.95
## 4 Austria   Female      45-54  607 268.3 2.09 43.44 1.50
## 5 Austria   Female    55 plus  507 250.6 2.25 44.18 2.20
## 6 Austria     Male 24 or less  448 284.0 2.47 47.53 2.85

The output of piaac.mean.pv() function is of the class intsvy.mean and plot() is one of overloaded functions for this class.

#
# plotting country average NUM performance 
plot(pmeansNC) + ggtitle("Country performance in NUM")

plot of chunk unnamed-chunk-5

# without se bars, not good idea
plot(pmeansNC, se=FALSE)

plot of chunk unnamed-chunk-5

# sorted, thats better
plot(pmeansNC, sort=TRUE)

plot of chunk unnamed-chunk-5

#
# plotting country average within 
# age groups NUM performance 
plot(pmeansNCA, sort=TRUE)

plot of chunk unnamed-chunk-5

#
# plotting country average within 
# age and gender groups NUM performance 
plot(na.omit(pmeansNCGA), sort=TRUE)

plot of chunk unnamed-chunk-5

Averages and group averages part 2

The piaac.mean() function calculates averages of ‘variable’ in groups defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.

Note that ‘variable’ should be continuous and should not be any of plausible values (there is a separate function piaac.mean.pv() for them).

# average age in different countries
head(pmeansAC <- piaac.mean(variable="AGE_R", by="CNTRYID", data=piaac, export=FALSE))
##          CNTRYID Freq  Mean s.e.
## 1        Austria    0   NaN  NaN
## 2        Belgium 5463 41.78 0.03
## 3         Canada    0   NaN  NaN
## 4 Czech Republic 6102 40.54 0.04
## 5        Denmark 7328 41.03 0.04
## 6        Estonia 7632 40.05 0.03
# average age in different countries and for differet genders
head(pmeansACG <- piaac.mean(variable="AGE_R", by=c("CNTRYID","GENDER_R"), data=piaac, export=FALSE))
##   CNTRYID GENDER_R Freq  Mean s.e.
## 1 Austria   Female    0   NaN  NaN
## 2 Austria     Male    0   NaN  NaN
## 3 Belgium   Female 2763 41.70 0.07
## 4 Belgium     Male 2700 41.86 0.07
## 5  Canada   Female    0   NaN  NaN
## 6  Canada     Male    0   NaN  NaN

As for piaac.mean.pv() also for piaac.mean() the output is of the class intsvy.mean and plot() is overloaded for it.

plot(na.omit(pmeansAC), sort=TRUE)

plot of chunk unnamed-chunk-7

plot(na.omit(pmeansACG), sort=TRUE)

plot of chunk unnamed-chunk-7

Regression models with plausible values

The piaac.reg.pv() function runes linear regression model with ‘pvlabel’ as an dependent variable and variables ‘x’ as independent variables. Regression models are calculated in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.

Note that ‘pvlab’ is one of ‘LIT’ (for literacy), ‘NUM’ (for numeracy), ‘PSL’ (for problem solving). In piaac data there are 10 plausible values for each of these dimensions.

#
# LITeracy explained by GENDER in different countries
rmodelLG <- piaac.reg.pv(pvlabel="LIT", x="GENDER_R", by = "CNTRYID", data=piaac, export=FALSE) 
rmodelLG[1:3]
## $Austria
##                Estimate Std. Error t value
## (Intercept)      271.53       1.04  259.90
## GENDER_RFemale    -4.14       1.32   -3.13
## R-squared          0.22       0.14    1.58
## 
## $Belgium
##                Estimate Std. Error t value
## (Intercept)      278.09       0.97  287.08
## GENDER_RFemale    -5.27       1.21   -4.36
## R-squared          0.31       0.15    2.17
## 
## $Canada
##                Estimate Std. Error t value
## (Intercept)      274.49       0.86  317.75
## GENDER_RFemale    -2.30       1.20   -1.92
## R-squared          0.06       0.05    1.04
#
# LITeracy explained by GENDER and level of trust in different countries
rmodelLGI <- piaac.reg.pv(pvlabel="LIT", x=c("GENDER_R", "I_Q06A"), by = "CNTRYID", data=piaac, export=FALSE) 
rmodelLGI[1:3]
## $Austria
##                                  Estimate Std. Error t value
## (Intercept)                        255.24       1.47  173.70
## GENDER_RFemale                      -4.03       1.33   -3.04
## I_Q06AAgree                         22.90       1.81   12.62
## I_Q06ANeither agree nor disagree    15.68       2.52    6.22
## I_Q06ADisagree                      28.18       2.12   13.29
## I_Q06AStrongly disagree             24.47       2.64    9.28
## R-squared                            7.19       0.85    8.47
## 
## $Belgium
##                                  Estimate Std. Error t value
## (Intercept)                        263.52       1.78  147.72
## GENDER_RFemale                      -5.21       1.23   -4.22
## I_Q06AAgree                          9.33       1.92    4.86
## I_Q06ANeither agree nor disagree    23.52       2.20   10.70
## I_Q06ADisagree                      23.83       2.16   11.02
## I_Q06AStrongly disagree             22.75       3.57    6.37
## R-squared                            4.37       0.63    7.00
## 
## $Canada
##                                  Estimate Std. Error t value
## (Intercept)                        260.38       1.62  161.09
## GENDER_RFemale                      -2.78       1.17   -2.38
## I_Q06AAgree                          8.15       1.94    4.21
## I_Q06ANeither agree nor disagree    14.57       1.93    7.55
## I_Q06ADisagree                      26.30       1.72   15.30
## I_Q06AStrongly disagree             27.02       2.79    9.68
## R-squared                            3.84       0.41    9.26

The piaac.mean.pv() function outputs an object of intsvy.reg class with overloaded plot() function.

# add se intervals
plot(rmodelLG, se=TRUE)

plot of chunk unnamed-chunk-9

# sorted along R square
plot(rmodelLG, se=TRUE, sort=TRUE)

plot of chunk unnamed-chunk-9

plot(rmodelLGI, se=TRUE)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.

plot of chunk unnamed-chunk-10

Regression models part 2

The piaac.reg() function fits regression models with ‘y’ as dependent variable and ‘x’ as independent variables and groups defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.

Note that ‘variable’ should be continuous and should not be any of plausible values (there is a separate function piaac.reg.pv() for them).

#
# regression GENDER on AGE per country
rmodelAGC <- piaac.reg(y="AGE_R", x="GENDER_R", by="CNTRYID", data=piaac, export=FALSE) 

As for piaac.reg.pv() also for piaac.reg() the output is of the class intsvy.reg and plot() is overloaded for it.

plot(rmodelAGC, se=TRUE)

plot of chunk unnamed-chunk-12

# sorted along R square
plot(rmodelAGC, se=TRUE, sort=TRUE)

plot of chunk unnamed-chunk-12