#load("E:/Dropbox/ICPSRML2020_admin/data/VB.RData")
#dta$ukpartyid
#dta$uspartyid
#dta$uspartyid7
#btp <- dta[,c("country", "ukpartyid", "uspartyid7", "authsub1", "authsub2", "authsub3", "authsub4", "authtrad1_1", "authtrad1_2", "authtrad1_3", "authtrad1_4", "authaggression_1", "authaggression_2", "authaggression_3", "authaggression_4", "roughedup", "sdo_1", "sdo_2", "sdo_3", "sdo_4", "egalitarianism_1", "egalitarianism_2", "egalitarianism_3", "egalitarianism_4", "suspicion_1", "suspicion_2", "suspicion_3", "suspicion_4", "lwa_1", "lwa_2", "lwa_3", "lwa_4", "lwa_5", "lwa_6", "populism_1", "populism_2", "populism_3", "populism_4", "happybrexit_1", "happytrump_1")]
#btp <- btp[complete.cases(btp[,c(-2,-3)]),]
#btp[,-c(1,2,3)] <- mapply(as.numeric, btp[,-c(1,2,3)])
#table(btp$uspartyid7)
#btp$uspartyid7[btp$uspartyid7 %in% 1:3] <- "1"
#btp$uspartyid7[btp$uspartyid7 == 4] <- "2"
#btp$uspartyid7[btp$uspartyid7 %in% 5:7] <- "3"
#
#btp$uspartyid7 <- factor(btp$uspartyid7, labels = c("Democrat", "Independent", "Republican"))
#colnames(btp)[3] <- "uspartyid"
# btp$pid.merge <- rep(NA, nrow(btp))
#
# unique(btp$ukpartyid)
# unique(btp$uspartyid)
#
# btp$pid.merge[btp$ukpartyid == "Conservative Party"] <- "Conservative"
# btp$pid.merge[btp$uspartyid == "Republican"] <- "Conservative"
#
# btp$pid.merge[btp$ukpartyid == "No - none"] <- "Independent"
# btp$pid.merge[btp$uspartyid == "Independent"] <- "Independent"
#
# btp$pid.merge[btp$ukpartyid == "Labour Party"] <- "Liberal"
# btp$pid.merge[btp$uspartyid == "Democrat"] <- "Liberal"
#
# btp$pid.merge <- factor(btp$pid.merge, levels = c("Liberal", "Independent", "Conservative"))
#
# btp <- cbind.data.frame(btp[,1:3], btp$pid.merge, btp[,-c(1,2,3,41)])
#
# colnames(btp)[4] <- "pid.merge"
#saveRDS(btp,"E:/Dropbox/ICPSRML2020_admin/data/BTP_cPCA.RDS")
#library(parallel)
#library(doParallel)
library(tidyverse)
library(ggrepel)
library(ggforce)
library(scPCA)
library(factoextra)
library(readit)
library(fastICA)
set.seed(1995)
cal <- readRDS("E:/Dropbox/ICPSRML2020_admin/data/cal_voters.RDS")
cal.pca <- prcomp(cal[,-1], center=T, scale=T)
cal.pca$rotation[rev(order(abs(cal.pca$rotation[,1]))), 1:3]
## PC1 PC2 PC3
## approve_trump -0.282937920 -0.032175056 0.116244411
## party 0.279404924 -0.030960656 0.012499770
## favor_kavanaugh -0.277073944 -0.012418419 0.086464889
## favor_borderwall -0.265821729 -0.063672032 0.073184115
## house_trump -0.259230391 -0.011741356 0.028396734
## approve_brown 0.249510200 -0.009722059 0.197286925
## favor_localrules 0.247859490 0.019258243 0.017240886
## favor_direction -0.241777885 -0.046857165 0.279004164
## favor_obamacare 0.241512612 -0.012177865 0.086295160
## view_republicans -0.239267618 0.043445322 0.173462971
## view_democrats 0.234410496 0.027585695 0.140412210
## approve_stateleg 0.231547345 -0.029765130 0.261358943
## ideology 0.227100002 -0.078323995 0.001777398
## size_govt -0.220401677 -0.028420446 -0.225419294
## favor_gunregs 0.206142668 -0.081001376 0.023860158
## us_econ -0.173708331 -0.107814861 0.342014750
## approve_congress -0.151919403 0.108367968 0.319014201
## cal_econ 0.110888324 -0.134489164 0.484474444
## race 0.070284757 0.343994993 0.120441277
## gender -0.058645683 -0.024112916 -0.024887836
## house_enthused 0.048871904 -0.364769370 0.256522430
## age 0.034938029 0.287813975 -0.066780424
## income 0.021506910 0.405996392 0.221631887
## education -0.017415657 0.453256040 0.275061542
## interest 0.003286214 -0.476734742 0.122699172
cal.pca$rotation[rev(order(abs(cal.pca$rotation[,2]))), 1:3]
## PC1 PC2 PC3
## interest 0.003286214 -0.476734742 0.122699172
## education -0.017415657 0.453256040 0.275061542
## income 0.021506910 0.405996392 0.221631887
## house_enthused 0.048871904 -0.364769370 0.256522430
## race 0.070284757 0.343994993 0.120441277
## age 0.034938029 0.287813975 -0.066780424
## cal_econ 0.110888324 -0.134489164 0.484474444
## approve_congress -0.151919403 0.108367968 0.319014201
## us_econ -0.173708331 -0.107814861 0.342014750
## favor_gunregs 0.206142668 -0.081001376 0.023860158
## ideology 0.227100002 -0.078323995 0.001777398
## favor_borderwall -0.265821729 -0.063672032 0.073184115
## favor_direction -0.241777885 -0.046857165 0.279004164
## view_republicans -0.239267618 0.043445322 0.173462971
## approve_trump -0.282937920 -0.032175056 0.116244411
## party 0.279404924 -0.030960656 0.012499770
## approve_stateleg 0.231547345 -0.029765130 0.261358943
## size_govt -0.220401677 -0.028420446 -0.225419294
## view_democrats 0.234410496 0.027585695 0.140412210
## gender -0.058645683 -0.024112916 -0.024887836
## favor_localrules 0.247859490 0.019258243 0.017240886
## favor_kavanaugh -0.277073944 -0.012418419 0.086464889
## favor_obamacare 0.241512612 -0.012177865 0.086295160
## house_trump -0.259230391 -0.011741356 0.028396734
## approve_brown 0.249510200 -0.009722059 0.197286925
cal.pca$rotation[order(abs(cal.pca$rotation[,3])), 1:3]
## PC1 PC2 PC3
## ideology 0.227100002 -0.078323995 0.001777398
## party 0.279404924 -0.030960656 0.012499770
## favor_localrules 0.247859490 0.019258243 0.017240886
## favor_gunregs 0.206142668 -0.081001376 0.023860158
## gender -0.058645683 -0.024112916 -0.024887836
## house_trump -0.259230391 -0.011741356 0.028396734
## age 0.034938029 0.287813975 -0.066780424
## favor_borderwall -0.265821729 -0.063672032 0.073184115
## favor_obamacare 0.241512612 -0.012177865 0.086295160
## favor_kavanaugh -0.277073944 -0.012418419 0.086464889
## approve_trump -0.282937920 -0.032175056 0.116244411
## race 0.070284757 0.343994993 0.120441277
## interest 0.003286214 -0.476734742 0.122699172
## view_democrats 0.234410496 0.027585695 0.140412210
## view_republicans -0.239267618 0.043445322 0.173462971
## approve_brown 0.249510200 -0.009722059 0.197286925
## income 0.021506910 0.405996392 0.221631887
## size_govt -0.220401677 -0.028420446 -0.225419294
## house_enthused 0.048871904 -0.364769370 0.256522430
## approve_stateleg 0.231547345 -0.029765130 0.261358943
## education -0.017415657 0.453256040 0.275061542
## favor_direction -0.241777885 -0.046857165 0.279004164
## approve_congress -0.151919403 0.108367968 0.319014201
## us_econ -0.173708331 -0.107814861 0.342014750
## cal_econ 0.110888324 -0.134489164 0.484474444
cal.pca$rotation[order(abs(cal.pca$rotation[,4])), 1:4]
## PC1 PC2 PC3 PC4
## favor_obamacare 0.241512612 -0.012177865 0.086295160 0.007068891
## favor_localrules 0.247859490 0.019258243 0.017240886 -0.010949977
## house_trump -0.259230391 -0.011741356 0.028396734 0.012140154
## favor_gunregs 0.206142668 -0.081001376 0.023860158 0.026534001
## party 0.279404924 -0.030960656 0.012499770 0.034948492
## favor_kavanaugh -0.277073944 -0.012418419 0.086464889 -0.036328379
## ideology 0.227100002 -0.078323995 0.001777398 -0.036967293
## race 0.070284757 0.343994993 0.120441277 -0.049541506
## approve_trump -0.282937920 -0.032175056 0.116244411 0.054213307
## view_republicans -0.239267618 0.043445322 0.173462971 0.061848085
## approve_congress -0.151919403 0.108367968 0.319014201 -0.062560376
## favor_direction -0.241777885 -0.046857165 0.279004164 -0.062835565
## size_govt -0.220401677 -0.028420446 -0.225419294 -0.075338770
## view_democrats 0.234410496 0.027585695 0.140412210 0.096013212
## approve_brown 0.249510200 -0.009722059 0.197286925 -0.100646931
## approve_stateleg 0.231547345 -0.029765130 0.261358943 -0.105586086
## favor_borderwall -0.265821729 -0.063672032 0.073184115 0.125657790
## interest 0.003286214 -0.476734742 0.122699172 0.198577626
## education -0.017415657 0.453256040 0.275061542 0.239855518
## us_econ -0.173708331 -0.107814861 0.342014750 -0.289685588
## house_enthused 0.048871904 -0.364769370 0.256522430 0.338881988
## cal_econ 0.110888324 -0.134489164 0.484474444 -0.354976678
## gender -0.058645683 -0.024112916 -0.024887836 -0.382935838
## income 0.021506910 0.405996392 0.221631887 0.409348270
## age 0.034938029 0.287813975 -0.066780424 -0.435994109
scree.plot.cal <- fviz_eig(cal.pca)
scree.plot.cal
component.plot.cal <- fviz_pca_var(cal.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), #Choose gradient colors
title = "Component Loadings Plot - PCA", #Change plot title
legend.title = "Contribution", #Change legend title
repel = TRUE) # Repel labels away from each other
component.plot.cal
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ind.plot.cal <- fviz_pca_ind(cal.pca,
col.ind = cal$vote_house,
palette = c("#FC4E07", "#00AFBB"), #Choose colors for Dems v. Reps
addEllipses = TRUE, #Add group ellipses
legend.title = "Congressional\nVote Choice",
title = "Individual Plot - PCA",
geom = "point",
repel = F)
ind.plot.cal
biplot.cal <- fviz_pca_biplot(cal.pca,
addEllipses = TRUE,
title = "Bi-Plot (Individual and Variable) - PCA",
geom = "point",
gradient.cols = c("springgreen3", "#E7B800", "orchid3"),
palette = c("#FC4E07", "#00AFBB"),
col.var = "contrib",
fill.ind = cal$vote_house, col.ind = "white",
pointshape = 21,
pointsize = 2,
repel = T) +
labs(fill = "Congressional\nVote Choice",
color = "Variable\nContribution")
biplot.cal
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
table(cal$party)
##
## 1 2 3 4 5 6 7
## 193 62 86 9 82 46 138
cal2 <- cal
cal2$party[cal2$party %in% 1:3] <- 1
cal2$party[cal2$party %in% 4] <- NA
cal2$party[cal2$party %in% 5:7] <- 2
cal2$party <- factor(cal2$party, labels = c("Democrat", "Republican"))
cal2$strength <- rep(NA, nrow(cal2))
cal2$strength[cal$party %in% c(1,7)] <- "Strong"
#cal2$strength[cal$party %in% c(2,6)] <- "Regular"
#cal2$strength[cal$party %in% c(3,5)] <- "Weak"
cal2$strength[cal$party %in% c(2,3,5,6)] <- "Weak"
cal2$strength <- factor(cal2$strength, levels = c("Weak", "Strong"))
cal2 <- na.omit(cal2)
cal2.X <- cal2[, !colnames(cal2) %in% c("party", "strength", "vote_house")]
###Democrats as target group###
cpca.cal.dem <- scPCA(target = cal2.X[cal2$party == "Democrat",],
background = cal2.X[cal2$party == "Republican",],
penalties = 0,
n_centers = 2)
# create a dataframe to be plotted
cpca.df.cal.dem <- cpca.cal.dem$x %>%
as_tibble() %>%
mutate(dem_approval = factor(cal2$view_democrats[cal2$party == "Democrat"], labels = c("Approve", "Disapprove")))
colnames(cpca.df.cal.dem) <- c("cPC1", "cPC2", "Dem_Approval")
# plot the results
cpca.plot.cal.dem <- ggplot(cpca.df.cal.dem, aes(x = cPC1, y = cPC2, colour = Dem_Approval, fill = Dem_Approval)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: Democrats", color = "Democratic Party\nApproval", fill = "Democratic Party\nApproval") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
scale_fill_manual(values = c("#00AFBB", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.dem
summary(lm(as.numeric(Dem_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.dem))
##
## Call:
## lm(formula = as.numeric(Dem_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46695 -0.11665 -0.02737 0.10608 0.67293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.234604 0.009485 130.162 <2e-16 ***
## cPC1 0.980700 0.024574 39.908 <2e-16 ***
## cPC2 -0.038585 0.031526 -1.224 0.222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1752 on 338 degrees of freedom
## Multiple R-squared: 0.8307, Adjusted R-squared: 0.8296
## F-statistic: 828.9 on 2 and 338 DF, p-value: < 2.2e-16
summary(glm(Dem_Approval ~ cPC1 + cPC2, data = cpca.df.cal.dem, family = binomial()))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = Dem_Approval ~ cPC1 + cPC2, family = binomial(),
## data = cpca.df.cal.dem)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26835 -0.00198 -0.00025 0.00000 2.00474
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.704 4.163 -2.091 0.0365 *
## cPC1 34.381 15.065 2.282 0.0225 *
## cPC2 5.310 3.909 1.358 0.1744
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 371.540 on 340 degrees of freedom
## Residual deviance: 7.915 on 338 degrees of freedom
## AIC: 13.915
##
## Number of Fisher Scoring iterations: 12
loadings.cal.dem <- cpca.cal.dem$rotation
rownames(loadings.cal.dem) <- colnames(cal2.X)
loadings.cal.dem[rev(order(abs(loadings.cal.dem[,1]))),]
## V1 V2
## view_democrats 0.890155737 0.040589255
## favor_kavanaugh -0.264959785 0.303617768
## us_econ 0.151029898 0.120071043
## favor_localrules -0.137919008 0.210545914
## favor_obamacare -0.136937438 -0.077507259
## approve_stateleg -0.128706559 -0.643946518
## favor_borderwall 0.118560352 0.022229060
## approve_brown -0.106716952 0.379300981
## size_govt -0.097344533 -0.339484403
## approve_trump -0.076206332 0.077841920
## house_trump 0.074220667 -0.392534284
## favor_direction -0.072864716 0.034958357
## race -0.066562562 -0.032153841
## cal_econ -0.041148352 -0.033124123
## view_republicans -0.031401837 0.010244219
## ideology -0.022674235 -0.000558741
## interest -0.016966310 -0.023263288
## favor_gunregs 0.015621123 0.001827250
## gender -0.013595153 0.004071648
## education -0.011495691 -0.005540192
## approve_congress -0.010045129 -0.048945424
## income -0.008710733 -0.015248340
## age 0.008543642 -0.014757672
## house_enthused 0.001078784 0.009793473
loadings.cal.dem[rev(order(abs(loadings.cal.dem[,2]))),]
## V1 V2
## approve_stateleg -0.128706559 -0.643946518
## house_trump 0.074220667 -0.392534284
## approve_brown -0.106716952 0.379300981
## size_govt -0.097344533 -0.339484403
## favor_kavanaugh -0.264959785 0.303617768
## favor_localrules -0.137919008 0.210545914
## us_econ 0.151029898 0.120071043
## approve_trump -0.076206332 0.077841920
## favor_obamacare -0.136937438 -0.077507259
## approve_congress -0.010045129 -0.048945424
## view_democrats 0.890155737 0.040589255
## favor_direction -0.072864716 0.034958357
## cal_econ -0.041148352 -0.033124123
## race -0.066562562 -0.032153841
## interest -0.016966310 -0.023263288
## favor_borderwall 0.118560352 0.022229060
## income -0.008710733 -0.015248340
## age 0.008543642 -0.014757672
## view_republicans -0.031401837 0.010244219
## house_enthused 0.001078784 0.009793473
## education -0.011495691 -0.005540192
## gender -0.013595153 0.004071648
## favor_gunregs 0.015621123 0.001827250
## ideology -0.022674235 -0.000558741
### Component Loadings Plot ###
loadings.cal.dem <- data.frame(PA1 = loadings.cal.dem[,1], PA2 = loadings.cal.dem[,2], Contribution = abs(loadings.cal.dem[,1]) + abs(loadings.cal.dem[,2]),
PA01 = rep(0,nrow(loadings.cal.dem)), PA02 = rep(0,nrow(loadings.cal.dem)),
Variable = colnames(cal2.X))
loadings.cal.dem.plot <- ggplot(data = loadings.cal.dem, aes(colour = Contribution)) +
geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) +
geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") +
coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) +
labs(x = "cPCA 1", y = "cPCA 2",
title = "Contrastive Principal Component Loadings\nTarget Group: Democrats") +
scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)
loadings.cal.dem.plot
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
###Democrats as target group###
cpca.cal.rep <- scPCA(target = cal2.X[cal2$party == "Republican",],
background = cal2.X[cal2$party == "Democrat",],
penalties = 0,
n_centers = 2)
# create a dataframe to be plotted
cpca.df.cal.rep <- cpca.cal.rep$x %>%
as_tibble() %>%
mutate(wall = factor(cal2$favor_borderwall[cal2$party == "Republican"], labels = c("Approve", "Disapprove")), approve_trump = factor(cal2$approve_trump[cal2$party == "Republican"], labels = c("Approve", "Disapprove")), vote_house = cal2$vote_house[cal2$party == "Republican"])
colnames(cpca.df.cal.rep) <- c("cPC1", "cPC2", "Wall_Approval", "Trump_Approval", "Vote_House")
# Wall Approval Plot
cpca.plot.cal.rep.wall <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Wall_Approval, fill = Wall_Approval)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: Republicans", color = "Border Wall\nApproval", fill = "Border Wall\nApproval") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
scale_fill_manual(values = c("#00AFBB", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.wall
# Trump Approval Plot
cpca.plot.cal.rep.trump <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Trump_Approval, fill = Trump_Approval)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: Republicans", color = "Trump\nApproval", fill = "Trump\nApproval") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
scale_fill_manual(values = c("#00AFBB", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.trump
# Congressional vote Plot
cpca.plot.cal.rep.house <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Vote_House, fill = Vote_House)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: Republicans", color = "House Vote", fill = "House Vote") +
theme_minimal() +
scale_color_manual(values = c("#FC4E07", "#00AFBB")) +
scale_fill_manual(values = c("#FC4E07", "#00AFBB")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.house
summary(lm(as.numeric(Wall_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.rep))
##
## Call:
## lm(formula = as.numeric(Wall_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.rep)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70609 -0.09284 0.00041 0.10592 0.63513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.25564 0.01186 105.84 <2e-16 ***
## cPC1 -0.62215 0.01911 -32.55 <2e-16 ***
## cPC2 0.35129 0.03058 11.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1935 on 263 degrees of freedom
## Multiple R-squared: 0.8055, Adjusted R-squared: 0.804
## F-statistic: 544.5 on 2 and 263 DF, p-value: < 2.2e-16
summary(glm(Wall_Approval ~ cPC1 + cPC2, data = cpca.df.cal.rep, family = binomial()))
##
## Call:
## glm(formula = Wall_Approval ~ cPC1 + cPC2, family = binomial(),
## data = cpca.df.cal.rep)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.60033 -0.04092 -0.01625 0.00285 2.17930
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.2198 0.9959 -4.237 2.26e-05 ***
## cPC1 -10.2438 2.0491 -4.999 5.76e-07 ***
## cPC2 5.4784 1.3848 3.956 7.61e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 302.413 on 265 degrees of freedom
## Residual deviance: 30.953 on 263 degrees of freedom
## AIC: 36.953
##
## Number of Fisher Scoring iterations: 9
loadings.cal.rep <- cpca.cal.rep$rotation
rownames(loadings.cal.rep) <- colnames(cal2.X)
loadings.cal.rep[rev(order(abs(loadings.cal.rep[,1]))),]
## V1 V2
## favor_borderwall -0.758839394 0.376646721
## view_republicans -0.430135247 -0.287753336
## favor_obamacare 0.250772833 0.111912723
## approve_brown 0.179210804 0.010090234
## favor_localrules 0.176983528 0.076818618
## favor_direction 0.154202561 -0.012938780
## approve_congress -0.139572527 -0.513576791
## house_enthused -0.133185879 -0.063515002
## interest -0.119955785 -0.064529695
## favor_gunregs 0.119002911 0.027896284
## house_trump 0.116420624 0.010328517
## education 0.040602510 0.018909800
## us_econ -0.039833199 -0.064297506
## cal_econ 0.037145127 0.064449190
## approve_trump -0.036983699 -0.632461170
## ideology 0.035946539 0.012364788
## size_govt -0.032974872 0.066538253
## gender 0.024122978 0.008415217
## view_democrats 0.019914883 0.030567034
## approve_stateleg 0.016410114 0.023758741
## race -0.007095832 -0.018055134
## age 0.005215688 0.004253755
## income 0.005016890 0.002988666
## favor_kavanaugh -0.002161387 0.261993868
loadings.cal.rep[rev(order(abs(loadings.cal.rep[,2]))),]
## V1 V2
## approve_trump -0.036983699 -0.632461170
## approve_congress -0.139572527 -0.513576791
## favor_borderwall -0.758839394 0.376646721
## view_republicans -0.430135247 -0.287753336
## favor_kavanaugh -0.002161387 0.261993868
## favor_obamacare 0.250772833 0.111912723
## favor_localrules 0.176983528 0.076818618
## size_govt -0.032974872 0.066538253
## interest -0.119955785 -0.064529695
## cal_econ 0.037145127 0.064449190
## us_econ -0.039833199 -0.064297506
## house_enthused -0.133185879 -0.063515002
## view_democrats 0.019914883 0.030567034
## favor_gunregs 0.119002911 0.027896284
## approve_stateleg 0.016410114 0.023758741
## education 0.040602510 0.018909800
## race -0.007095832 -0.018055134
## favor_direction 0.154202561 -0.012938780
## ideology 0.035946539 0.012364788
## house_trump 0.116420624 0.010328517
## approve_brown 0.179210804 0.010090234
## gender 0.024122978 0.008415217
## age 0.005215688 0.004253755
## income 0.005016890 0.002988666
### Component Loadings Plot ###
loadings.cal.rep <- data.frame(PA1 = loadings.cal.rep[,1], PA2 = loadings.cal.rep[,2], Contribution = abs(loadings.cal.rep[,1]) + abs(loadings.cal.rep[,2]),
PA01 = rep(0,nrow(loadings.cal.rep)), PA02 = rep(0,nrow(loadings.cal.rep)),
Variable = colnames(cal2.X))
loadings.cal.rep.plot <- ggplot(data = loadings.cal.rep, aes(colour = Contribution)) +
geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) +
geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") +
coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) +
labs(x = "ICA 1", y = "ICA 2",
title = "Contrastive Principal Component Loadings\nTarget Group: Republicans") +
scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)
loadings.cal.rep.plot
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
btp <- readRDS("E:/Dropbox/ICPSRML2020_admin/data/BTP_cPCA.RDS")
btp.X <- btp[,-c(1,2,3,4)]
btp.pca <- prcomp(btp.X, center=T, scale=T)
scree.plot.btp <- fviz_eig(btp.pca)
scree.plot.btp
component.plot.btp <- fviz_pca_var(btp.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), #Choose gradient colors
title = "Component Loadings Plot - PCA", #Change plot title
legend.title = "Contribution", #Change legend title
repel = TRUE) # Repel labels away from each other
component.plot.btp
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ind.plot.btp.country <- fviz_pca_ind(btp.pca,
col.ind = btp$country,
palette = c("#FC4E07", "#00AFBB"), #Choose colors for Dems v. Reps
addEllipses = TRUE, #Add group ellipses
legend.title = "Country",
title = "Individual Plot - PCA",
geom = "point",
repel = F)
ind.plot.btp.country
ind.plot.btp.pid <- fviz_pca_ind(btp.pca,
col.ind = btp$pid.merge,
palette = c("#00AFBB","forestgreen", "#FC4E07"), #Choose colors for Dems v. Reps
addEllipses = TRUE, #Add group ellipses
legend.title = "Party ID",
title = "Individual Plot - PCA",
geom = "point",
repel = F)
ind.plot.btp.pid
## Warning: Removed 733 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
biplot.btp <- fviz_pca_biplot(btp.pca,
addEllipses = TRUE,
title = "Bi-Plot (Individual and Variable) - PCA",
geom = "point",
gradient.cols = c("springgreen3", "#E7B800", "orchid3"),
palette = c("#FC4E07", "#00AFBB"),
col.var = "contrib",
fill.ind = btp$country, col.ind = "white",
pointshape = 21,
pointsize = 2,
repel = T) +
labs(fill = "Country",
color = "Variable\nContribution")
biplot.btp
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
###US as target group###
cpca.btp.us <- scPCA(target = btp.X[btp$country == "US",],
background = btp.X[btp$country == "UK",],
penalties = 0,
n_centers = 2)
# create a dataframe to be plotted
cpca.df.btp.us <- cpca.btp.us$x %>%
as_tibble() %>%
mutate(pid = btp$uspartyid[btp$country == "US"], auth = factor(btp$authsub3[btp$country == "US"], labels = c("Low", "High")))
colnames(cpca.df.btp.us) <- c("cPC1", "cPC2", "Party_ID", "Authoritarian")
cpca.df.btp.us.pid <- na.omit(cpca.df.btp.us)
### Party ID Plot
cpca.plot.btp.us.pid <- ggplot(cpca.df.btp.us.pid, aes(x = cPC1, y = cPC2, colour = Party_ID, fill = Party_ID)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: US", color = "Party ID", fill = "Party ID") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) +
scale_fill_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.us.pid
### Authoritarian Plot
cpca.plot.btp.us.auth <- ggplot(cpca.df.btp.us, aes(x = cPC1, y = cPC2, colour = Authoritarian, fill = Authoritarian)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: US", color = "Authoritarian") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
scale_fill_manual(values = c("#00AFBB", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.us.auth
### Both Plot
cpca.plot.btp.us.both <- ggplot(cpca.df.btp.us, aes(x = cPC1, y = cPC2, color = Party_ID, fill = Authoritarian)) +
geom_jitter() +
labs(title = "cPCA Target Group: US", color = "Party ID", fill = "Authoritarianism") +
theme_minimal() +
scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) +
scale_fill_manual(values = c("#E7B800", "orchid3")) +
stat_ellipse(aes(x = cPC1, y = cPC2,fill = Authoritarian),geom = "polygon", alpha = .3, inherit.aes = F)
cpca.plot.btp.us.both
## Warning: Removed 53 rows containing missing values (geom_point).
# Party ID regressions
summary(lm(as.numeric(Party_ID) ~ cPC1 + cPC2, data = cpca.df.btp.us))
##
## Call:
## lm(formula = as.numeric(Party_ID) ~ cPC1 + cPC2, data = cpca.df.btp.us)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.21163 -0.54204 -0.08265 0.52173 1.86325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.045096 0.019432 105.243 < 2e-16 ***
## cPC1 0.209245 0.006909 30.287 < 2e-16 ***
## cPC2 0.140210 0.043703 3.208 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7156 on 1353 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.4123, Adjusted R-squared: 0.4114
## F-statistic: 474.6 on 2 and 1353 DF, p-value: < 2.2e-16
summary(glm(Party_ID ~ cPC1 + cPC2, data = cpca.df.btp.us, family = binomial()))
##
## Call:
## glm(formula = Party_ID ~ cPC1 + cPC2, family = binomial(), data = cpca.df.btp.us)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0648 -0.7362 0.2021 0.6845 2.6062
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.73377 0.08044 9.122 <2e-16 ***
## cPC1 0.70886 0.03944 17.975 <2e-16 ***
## cPC2 0.39930 0.15943 2.505 0.0123 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1837.1 on 1355 degrees of freedom
## Residual deviance: 1203.0 on 1353 degrees of freedom
## (53 observations deleted due to missingness)
## AIC: 1209
##
## Number of Fisher Scoring iterations: 5
loadings.us <- cpca.btp.us$rotation
rownames(loadings.us) <- colnames(btp.X)
loadings.us[rev(order(abs(loadings.us[,1]))),]
## V1 V2
## lwa_4 -0.406723132 0.0066299212
## happytrump_1 0.333529576 -0.0029147682
## populism_2 -0.319999220 0.0340266398
## lwa_2 -0.310061575 0.0630453070
## authtrad1_2 -0.265541820 0.0003852233
## sdo_2 -0.256601943 0.0360429524
## authaggression_3 0.208637535 0.0110961265
## lwa_1 -0.201022357 -0.0173206422
## sdo_3 -0.188627447 0.0697628062
## sdo_1 -0.185576240 -0.0146080101
## authaggression_1 0.167486500 -0.0437943194
## suspicion_1 0.151510152 0.0393740291
## roughedup 0.147412845 0.0301092678
## lwa_3 0.139966372 -0.0139270685
## authaggression_4 -0.128805088 0.0267190314
## happybrexit_1 -0.125252324 0.0062415136
## egalitarianism_1 -0.115100323 0.0447251380
## egalitarianism_4 -0.114289273 -0.0300705423
## egalitarianism_2 0.113998047 0.0099779582
## authsub3 -0.101487020 -0.5826853195
## populism_3 0.095450220 -0.0134094672
## authtrad1_3 0.094087030 0.0035035386
## sdo_4 -0.087097102 -0.0281026067
## authtrad1_1 -0.084630391 0.0124571266
## lwa_6 -0.081147476 0.0812410290
## authtrad1_4 0.073157073 -0.0376456895
## suspicion_4 0.071386439 -0.0564709183
## lwa_5 0.065196604 -0.0178370884
## suspicion_2 0.047296291 -0.0425524843
## authsub1 -0.040956272 -0.3952575847
## populism_1 0.040480026 -0.0390836658
## populism_4 0.016711284 0.0167922932
## suspicion_3 0.015868337 0.0134571269
## authsub2 -0.010664363 0.4481403968
## authaggression_2 0.008119384 -0.0110600909
## authsub4 -0.002812553 0.5160920436
## egalitarianism_3 0.001531861 -0.0130372594
loadings.us[rev(order(abs(loadings.us[,2]))),]
## V1 V2
## authsub3 -0.101487020 -0.5826853195
## authsub4 -0.002812553 0.5160920436
## authsub2 -0.010664363 0.4481403968
## authsub1 -0.040956272 -0.3952575847
## lwa_6 -0.081147476 0.0812410290
## sdo_3 -0.188627447 0.0697628062
## lwa_2 -0.310061575 0.0630453070
## suspicion_4 0.071386439 -0.0564709183
## egalitarianism_1 -0.115100323 0.0447251380
## authaggression_1 0.167486500 -0.0437943194
## suspicion_2 0.047296291 -0.0425524843
## suspicion_1 0.151510152 0.0393740291
## populism_1 0.040480026 -0.0390836658
## authtrad1_4 0.073157073 -0.0376456895
## sdo_2 -0.256601943 0.0360429524
## populism_2 -0.319999220 0.0340266398
## roughedup 0.147412845 0.0301092678
## egalitarianism_4 -0.114289273 -0.0300705423
## sdo_4 -0.087097102 -0.0281026067
## authaggression_4 -0.128805088 0.0267190314
## lwa_5 0.065196604 -0.0178370884
## lwa_1 -0.201022357 -0.0173206422
## populism_4 0.016711284 0.0167922932
## sdo_1 -0.185576240 -0.0146080101
## lwa_3 0.139966372 -0.0139270685
## suspicion_3 0.015868337 0.0134571269
## populism_3 0.095450220 -0.0134094672
## egalitarianism_3 0.001531861 -0.0130372594
## authtrad1_1 -0.084630391 0.0124571266
## authaggression_3 0.208637535 0.0110961265
## authaggression_2 0.008119384 -0.0110600909
## egalitarianism_2 0.113998047 0.0099779582
## lwa_4 -0.406723132 0.0066299212
## happybrexit_1 -0.125252324 0.0062415136
## authtrad1_3 0.094087030 0.0035035386
## happytrump_1 0.333529576 -0.0029147682
## authtrad1_2 -0.265541820 0.0003852233
### Component Loadings Plot ###
loadings.us <- data.frame(PA1 = loadings.us[,1], PA2 = loadings.us[,2], Contribution = abs(loadings.us[,1]) + abs(loadings.us[,2]),
PA01 = rep(0,nrow(loadings.us)), PA02 = rep(0,nrow(loadings.us)),
Variable = colnames(btp.X))
loadings.us.plot <- ggplot(data = loadings.us, aes(colour = Contribution)) +
geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) +
geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") +
coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) +
labs(x = "cPCA 1", y = "cPCA 2",
title = "Contrastive Principal Component Loadings\nTarget Group: US Respondents") +
scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)
loadings.us.plot
## Warning: ggrepel: 33 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
###UK as target group###
cpca.btp.uk <- scPCA(target = btp.X[btp$country == "UK",],
background = btp.X[btp$country == "US",],
penalties = 0,
n_centers = 2)
# create a dataframe to be plotted
cpca.df.btp.uk <- cpca.btp.uk$x %>%
as_tibble() %>%
mutate(brexit = btp$happybrexit_1[btp$country == "UK"], brexit.cat = factor(btp$happybrexit_1[btp$country == "UK"], labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")))
colnames(cpca.df.btp.uk) <- c("cPC1", "cPC2", "Brexit_Support", "Brexit.cat")
# plot the results
cpca.plot.btp.uk <- ggplot(cpca.df.btp.uk, aes(x = cPC1, y = cPC2, colour = Brexit_Support, fill = Brexit.cat)) +
geom_point(alpha = 0.5) +
labs(title = "cPCA Target Group: UK", color = "Brexit\nSupport", fill = "Brexit\nSupport\n(Categorical)") +
theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) +
scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) +
stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.uk
summary(lm(Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk))
##
## Call:
## lm(formula = Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9934 -0.8550 -0.0550 0.7715 4.7209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.869543 0.028532 170.67 <2e-16 ***
## cPC1 1.247196 0.009907 125.89 <2e-16 ***
## cPC2 1.130860 0.043351 26.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.251 on 1921 degrees of freedom
## Multiple R-squared: 0.8992, Adjusted R-squared: 0.8991
## F-statistic: 8566 on 2 and 1921 DF, p-value: < 2.2e-16
summary(glm(Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk))
##
## Call:
## glm(formula = Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9934 -0.8550 -0.0550 0.7715 4.7209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.869543 0.028532 170.67 <2e-16 ***
## cPC1 1.247196 0.009907 125.89 <2e-16 ***
## cPC2 1.130860 0.043351 26.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.566232)
##
## Null deviance: 29842.3 on 1923 degrees of freedom
## Residual deviance: 3008.7 on 1921 degrees of freedom
## AIC: 6328.3
##
## Number of Fisher Scoring iterations: 2
loadings.uk <- cpca.btp.uk$rotation
rownames(loadings.uk) <- colnames(btp.X)
loadings.uk[rev(order(abs(loadings.uk[,1]))),]
## V1 V2
## happybrexit_1 0.749839745 2.701631e-02
## lwa_3 -0.334838210 4.671154e-02
## lwa_4 0.268863492 -1.286241e-01
## happytrump_1 -0.207652779 1.858911e-02
## authaggression_4 -0.176150035 6.652192e-02
## populism_3 -0.163668835 5.679898e-02
## authaggression_1 -0.156119792 -7.214861e-03
## populism_4 0.136800007 2.488584e-01
## authaggression_2 0.134329946 5.573821e-02
## sdo_4 -0.132081068 3.213242e-02
## roughedup -0.128679690 2.097051e-02
## egalitarianism_3 -0.094974741 1.266431e-02
## sdo_2 0.080156310 3.484483e-02
## authtrad1_4 0.075817842 -3.403901e-02
## populism_1 0.073163399 -3.479017e-02
## sdo_3 -0.064866130 6.804666e-02
## authaggression_3 0.056758293 -1.219197e-01
## egalitarianism_1 0.056312323 -6.502809e-02
## sdo_1 0.049984361 -9.189162e-03
## egalitarianism_4 -0.048043500 3.718706e-02
## egalitarianism_2 -0.045274475 -2.235646e-03
## authtrad1_2 -0.043579612 2.711178e-02
## lwa_1 0.042242056 6.555546e-02
## populism_2 0.038565617 8.616577e-05
## authsub1 0.036613787 5.986237e-01
## suspicion_2 -0.035309228 -1.047677e-01
## authtrad1_1 0.034304286 -5.242067e-03
## suspicion_4 -0.030679040 1.291797e-03
## lwa_2 0.029531777 1.333303e-02
## suspicion_1 0.025594384 -1.150830e-01
## lwa_5 -0.020712192 3.706571e-02
## authsub2 -0.020192759 -6.417635e-01
## authtrad1_3 -0.019529206 -5.089239e-03
## authsub4 0.016309102 2.317781e-01
## suspicion_3 -0.012326195 -4.672539e-02
## authsub3 -0.012325480 1.344138e-01
## lwa_6 0.008349547 4.477272e-02
loadings.uk[rev(order(abs(loadings.uk[,2]))),]
## V1 V2
## authsub2 -0.020192759 -6.417635e-01
## authsub1 0.036613787 5.986237e-01
## populism_4 0.136800007 2.488584e-01
## authsub4 0.016309102 2.317781e-01
## authsub3 -0.012325480 1.344138e-01
## lwa_4 0.268863492 -1.286241e-01
## authaggression_3 0.056758293 -1.219197e-01
## suspicion_1 0.025594384 -1.150830e-01
## suspicion_2 -0.035309228 -1.047677e-01
## sdo_3 -0.064866130 6.804666e-02
## authaggression_4 -0.176150035 6.652192e-02
## lwa_1 0.042242056 6.555546e-02
## egalitarianism_1 0.056312323 -6.502809e-02
## populism_3 -0.163668835 5.679898e-02
## authaggression_2 0.134329946 5.573821e-02
## suspicion_3 -0.012326195 -4.672539e-02
## lwa_3 -0.334838210 4.671154e-02
## lwa_6 0.008349547 4.477272e-02
## egalitarianism_4 -0.048043500 3.718706e-02
## lwa_5 -0.020712192 3.706571e-02
## sdo_2 0.080156310 3.484483e-02
## populism_1 0.073163399 -3.479017e-02
## authtrad1_4 0.075817842 -3.403901e-02
## sdo_4 -0.132081068 3.213242e-02
## authtrad1_2 -0.043579612 2.711178e-02
## happybrexit_1 0.749839745 2.701631e-02
## roughedup -0.128679690 2.097051e-02
## happytrump_1 -0.207652779 1.858911e-02
## lwa_2 0.029531777 1.333303e-02
## egalitarianism_3 -0.094974741 1.266431e-02
## sdo_1 0.049984361 -9.189162e-03
## authaggression_1 -0.156119792 -7.214861e-03
## authtrad1_1 0.034304286 -5.242067e-03
## authtrad1_3 -0.019529206 -5.089239e-03
## egalitarianism_2 -0.045274475 -2.235646e-03
## suspicion_4 -0.030679040 1.291797e-03
## populism_2 0.038565617 8.616577e-05
###Principal Component Loadings###
loadings.uk <- data.frame(PA1 = loadings.uk[,1], PA2 = loadings.uk[,2], Contribution = abs(loadings.uk[,1]) + abs(loadings.uk[,2]),
PA01 = rep(0,nrow(loadings.uk)), PA02 = rep(0,nrow(loadings.uk)),
Variable = colnames(btp.X))
loadings.uk.plot <- ggplot(data = loadings.uk, aes(colour = Contribution)) +
geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) +
geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") +
coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) +
labs(x = "cPCA 1", y = "cPCA 2",
title = "Contrastive Principal Component Loadings\nTarget Group: UK Respondents") +
scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)
loadings.uk.plot
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ica.btp <- fastICA(btp.X, 2, "deflation", "logcosh", tol = 0.00001, row.norm = T, verbose = T)
## Centering
## Whitening
## Deflation FastICA using logcosh approx. to neg-entropy function
## Component 1
## Iteration 1 tol = 0.2051195
## Iteration 2 tol = 0.2005046
## Iteration 3 tol = 0.1970449
## Iteration 4 tol = 0.1925211
## Iteration 5 tol = 0.1890349
## Iteration 6 tol = 0.1845145
## Iteration 7 tol = 0.1809467
## Iteration 8 tol = 0.1763612
## Iteration 9 tol = 0.1726652
## Iteration 10 tol = 0.16796
## Iteration 11 tol = 0.1640965
## Iteration 12 tol = 0.1592283
## Iteration 13 tol = 0.1551649
## Iteration 14 tol = 0.150102
## Iteration 15 tol = 0.1458151
## Iteration 16 tol = 0.1405394
## Iteration 17 tol = 0.1360168
## Iteration 18 tol = 0.1305265
## Iteration 19 tol = 0.1257717
## Iteration 20 tol = 0.1200855
## Iteration 21 tol = 0.1151213
## Iteration 22 tol = 0.1092821
## Iteration 23 tol = 0.1041548
## Iteration 24 tol = 0.09823166
## Iteration 25 tol = 0.09301263
## Iteration 26 tol = 0.08709983
## Iteration 27 tol = 0.08188361
## Iteration 28 tol = 0.0760954
## Iteration 29 tol = 0.07099334
## Iteration 30 tol = 0.06545341
## Iteration 31 tol = 0.0605826
## Iteration 32 tol = 0.05540997
## Iteration 33 tol = 0.05087934
## Iteration 34 tol = 0.04617358
## Iteration 35 tol = 0.04207046
## Iteration 36 tol = 0.03789982
## Iteration 37 tol = 0.03428033
## Iteration 38 tol = 0.03067592
## Iteration 39 tol = 0.02756161
## Iteration 40 tol = 0.02451851
## Iteration 41 tol = 0.02189914
## Iteration 42 tol = 0.01938313
## Iteration 43 tol = 0.01722392
## Iteration 44 tol = 0.0151811
## Iteration 45 tol = 0.01343168
## Iteration 46 tol = 0.01179827
## Iteration 47 tol = 0.01040124
## Iteration 48 tol = 0.00911154
## Iteration 49 tol = 0.008009116
## Iteration 50 tol = 0.007001115
## Iteration 51 tol = 0.006139481
## Iteration 52 tol = 0.005358002
## Iteration 53 tol = 0.004689698
## Iteration 54 tol = 0.004087667
## Iteration 55 tol = 0.003572435
## Iteration 56 tol = 0.003110922
## Iteration 57 tol = 0.002715572
## Iteration 58 tol = 0.002363117
## Iteration 59 tol = 0.002060866
## Iteration 60 tol = 0.001792474
## Iteration 61 tol = 0.001562056
## Iteration 62 tol = 0.001358125
## Iteration 63 tol = 0.001182852
## Iteration 64 tol = 0.001028157
## Iteration 65 tol = 0.000895057
## Iteration 66 tol = 0.0007778563
## Iteration 67 tol = 0.0006769135
## Iteration 68 tol = 0.0005882022
## Iteration 69 tol = 0.0005117238
## Iteration 70 tol = 0.0004446233
## Iteration 71 tol = 0.0003867248
## Iteration 72 tol = 0.0003359968
## Iteration 73 tol = 0.0002921903
## Iteration 74 tol = 0.0002538544
## Iteration 75 tol = 0.0002207253
## Iteration 76 tol = 0.0001917625
## Iteration 77 tol = 0.0001667171
## Iteration 78 tol = 0.0001448402
## Iteration 79 tol = 0.0001259112
## Iteration 80 tol = 0.000109389
## Iteration 81 tol = 9.508578e-05
## Iteration 82 tol = 8.260895e-05
## Iteration 83 tol = 7.180289e-05
## Iteration 84 tol = 6.238169e-05
## Iteration 85 tol = 5.421878e-05
## Iteration 86 tol = 4.710526e-05
## Iteration 87 tol = 4.09396e-05
## Iteration 88 tol = 3.55687e-05
## Iteration 89 tol = 3.091199e-05
## Iteration 90 tol = 2.685692e-05
## Iteration 91 tol = 2.33401e-05
## Iteration 92 tol = 2.027853e-05
## Iteration 93 tol = 1.76227e-05
## Iteration 94 tol = 1.531125e-05
## Iteration 95 tol = 1.330571e-05
## Iteration 96 tol = 1.15606e-05
## Iteration 97 tol = 1.004616e-05
## Iteration 98 tol = 8.728639e-06
## Component 2
## Iteration 1 tol = 0
ica.X <- t(ica.btp$A) # Estimated matrix
rownames(ica.X) <- colnames(btp.X)
colnames(ica.X) <- c("IC1", "IC2")
ica.X[rev(order(abs(ica.X[,1]))),1:2] #Factor/Component loadings
## IC1 IC2
## authtrad1_3 0.68490180 0.13089438
## authtrad1_4 0.68167633 0.07680311
## authtrad1_1 0.66626428 0.10268743
## happytrump_1 0.65330678 0.20916839
## lwa_2 -0.63959067 0.30735530
## happybrexit_1 0.56894282 0.17811329
## authtrad1_2 -0.55968342 0.33643366
## egalitarianism_2 0.55694820 0.43817681
## egalitarianism_4 -0.51434772 0.03936232
## roughedup 0.50628236 0.35895608
## authaggression_3 0.48590738 0.11277559
## sdo_2 -0.46516595 0.15168546
## egalitarianism_3 0.46495268 0.49183156
## authsub1 0.46402278 -0.07320862
## authaggression_2 0.46012148 0.10798396
## authaggression_4 -0.44966986 0.21432098
## lwa_1 -0.40659577 0.46117866
## egalitarianism_1 -0.38950833 -0.01955628
## authsub2 0.38203297 0.14013115
## authsub4 0.34988129 0.16482354
## lwa_4 -0.33342333 0.32084164
## sdo_1 0.32222211 0.67102921
## authaggression_1 -0.27176177 0.04818908
## authsub3 0.26974796 0.19480693
## suspicion_4 0.17833443 0.02289231
## lwa_5 0.16019212 0.22214459
## sdo_3 0.15061466 0.76036962
## lwa_3 0.14797780 -0.08557332
## sdo_4 0.14107600 0.76355368
## populism_1 0.13572520 0.49886966
## populism_3 0.12297414 0.31852355
## populism_4 0.10950240 -0.09214674
## lwa_6 0.09806222 0.52869531
## populism_2 -0.09744551 0.43427427
## suspicion_3 0.09410816 -0.23564341
## suspicion_2 0.01907737 -0.28135923
## suspicion_1 0.01153922 -0.38524179
ica.X[rev(order(abs(ica.X[,2]))),1:2] #Factor/Component loadings
## IC1 IC2
## sdo_4 0.14107600 0.76355368
## sdo_3 0.15061466 0.76036962
## sdo_1 0.32222211 0.67102921
## lwa_6 0.09806222 0.52869531
## populism_1 0.13572520 0.49886966
## egalitarianism_3 0.46495268 0.49183156
## lwa_1 -0.40659577 0.46117866
## egalitarianism_2 0.55694820 0.43817681
## populism_2 -0.09744551 0.43427427
## suspicion_1 0.01153922 -0.38524179
## roughedup 0.50628236 0.35895608
## authtrad1_2 -0.55968342 0.33643366
## lwa_4 -0.33342333 0.32084164
## populism_3 0.12297414 0.31852355
## lwa_2 -0.63959067 0.30735530
## suspicion_2 0.01907737 -0.28135923
## suspicion_3 0.09410816 -0.23564341
## lwa_5 0.16019212 0.22214459
## authaggression_4 -0.44966986 0.21432098
## happytrump_1 0.65330678 0.20916839
## authsub3 0.26974796 0.19480693
## happybrexit_1 0.56894282 0.17811329
## authsub4 0.34988129 0.16482354
## sdo_2 -0.46516595 0.15168546
## authsub2 0.38203297 0.14013115
## authtrad1_3 0.68490180 0.13089438
## authaggression_3 0.48590738 0.11277559
## authaggression_2 0.46012148 0.10798396
## authtrad1_1 0.66626428 0.10268743
## populism_4 0.10950240 -0.09214674
## lwa_3 0.14797780 -0.08557332
## authtrad1_4 0.68167633 0.07680311
## authsub1 0.46402278 -0.07320862
## authaggression_1 -0.27176177 0.04818908
## egalitarianism_4 -0.51434772 0.03936232
## suspicion_4 0.17833443 0.02289231
## egalitarianism_1 -0.38950833 -0.01955628
#### Plotting Individual Loadings ####
# create a dataframe to be plotted
ica.df.btp <- ica.btp$S %>%
as_tibble() %>%
mutate(brexit = btp$happybrexit_1, trump = btp$happytrump_1, pid = btp$pid.merge, country = btp$country, brexit.cat = factor(btp$happybrexit_1, labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")), trump.cat = factor(btp$happybrexit_1, labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")))
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
colnames(ica.df.btp) <- c("IC1", "IC2", "Brexit_Support", "Trump_Support", "Party_ID", "Country", "Brexit.cat", "Trump.cat")
# plot Brexit Results
ica.plot.btp.brexit <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Brexit_Support, fill = Brexit.cat)) +
geom_point(alpha = 0.5) +
labs(title = "ICA BtP", color = "Brexit\nSupport", fill = "Brexit\nSupport\n(Categorical)") +
theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) +
scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) +
stat_ellipse( geom = "polygon", alpha = .3)
ica.plot.btp.brexit
# plot Trump Results
ica.plot.btp.trump <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Trump_Support, fill = Brexit.cat)) +
geom_point(alpha = 0.5) +
labs(title = "ICA BtP", color = "Trump\nSupport", fill = "Trump\nSupport\n(Categorical)") +
theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) +
scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) +
stat_ellipse( geom = "polygon", alpha = .3)
ica.plot.btp.trump
# plot PID Results
ica.plot.btp.pid <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Party_ID, fill = Party_ID)) +
geom_point(alpha = 0.5) +
labs(title = "ICA BtP", color = "Party\nID") +
theme_minimal() + scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white")) +
scale_fill_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white")) +
stat_ellipse(geom = "polygon", alpha = .3)
ica.plot.btp.pid
## Warning: Removed 733 rows containing missing values (geom_point).
# # plot Country Results
# ica.plot.btp.country <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Country)) +
# geom_point(alpha = 0.5) +
# labs(title = "ICA BtP", color = "Country") +
# theme_minimal() #+ scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white"))
# ica.plot.btp.country
# IC Regressions
summary(lm(Brexit_Support ~ IC1 + IC2, data = ica.df.btp))
##
## Call:
## lm(formula = Brexit_Support ~ IC1 + IC2, data = ica.df.btp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7925 -2.0306 0.0036 2.0472 10.2891
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.94959 0.04951 99.97 <2e-16 ***
## IC1 2.02522 0.04951 40.90 <2e-16 ***
## IC2 0.63402 0.04951 12.80 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.858 on 3330 degrees of freedom
## Multiple R-squared: 0.3555, Adjusted R-squared: 0.3551
## F-statistic: 918.5 on 2 and 3330 DF, p-value: < 2.2e-16
summary(glm(Brexit.cat ~ IC1 + IC2, data = ica.df.btp, family = binomial()))
##
## Call:
## glm(formula = Brexit.cat ~ IC1 + IC2, family = binomial(), data = ica.df.btp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8889 -0.7886 0.4695 0.7665 2.5861
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.68798 0.04365 15.762 <2e-16 ***
## IC1 1.45241 0.05893 24.648 <2e-16 ***
## IC2 0.41595 0.04524 9.195 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4389.2 on 3332 degrees of freedom
## Residual deviance: 3296.0 on 3330 degrees of freedom
## AIC: 3302
##
## Number of Fisher Scoring iterations: 5
summary(lm(Trump_Support ~ IC1 + IC2, data = ica.df.btp))
##
## Call:
## lm(formula = Trump_Support ~ IC1 + IC2, data = ica.df.btp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4322 -2.0227 0.1637 1.8774 11.0039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1809 0.0465 89.91 <2e-16 ***
## IC1 2.4099 0.0465 51.83 <2e-16 ***
## IC2 0.7716 0.0465 16.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.685 on 3330 degrees of freedom
## Multiple R-squared: 0.4707, Adjusted R-squared: 0.4704
## F-statistic: 1481 on 2 and 3330 DF, p-value: < 2.2e-16
summary(glm(Trump.cat ~ IC1 + IC2, data = ica.df.btp, family = binomial()))
##
## Call:
## glm(formula = Trump.cat ~ IC1 + IC2, family = binomial(), data = ica.df.btp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8889 -0.7886 0.4695 0.7665 2.5861
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.68798 0.04365 15.762 <2e-16 ***
## IC1 1.45241 0.05893 24.648 <2e-16 ***
## IC2 0.41595 0.04524 9.195 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4389.2 on 3332 degrees of freedom
## Residual deviance: 3296.0 on 3330 degrees of freedom
## AIC: 3302
##
## Number of Fisher Scoring iterations: 5
summary(lm(as.numeric(Party_ID) ~ IC1 + IC2, data = ica.df.btp))
##
## Call:
## lm(formula = as.numeric(Party_ID) ~ IC1 + IC2, data = ica.df.btp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02326 -0.71877 0.04374 0.67676 2.14258
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.96731 0.01537 128.032 <2e-16 ***
## IC1 0.49491 0.01522 32.511 <2e-16 ***
## IC2 0.03421 0.01495 2.288 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7823 on 2597 degrees of freedom
## (733 observations deleted due to missingness)
## Multiple R-squared: 0.2894, Adjusted R-squared: 0.2889
## F-statistic: 528.9 on 2 and 2597 DF, p-value: < 2.2e-16
ica.df.btp.pid <- ica.df.btp
ica.df.btp.pid$Party_ID[ica.df.btp.pid$Party_ID == "Independent"] <- NA
ica.df.btp.pid <- na.omit(ica.df.btp.pid)
ica.df.btp.pid$Party_ID <- factor(ica.df.btp.pid$Party_ID, labels = c("Liberal", "Conservative"))
summary(glm(Party_ID ~ IC1 + IC2, data = ica.df.btp.pid, family = binomial()))
##
## Call:
## glm(formula = Party_ID ~ IC1 + IC2, family = binomial(), data = ica.df.btp.pid)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7278 -0.8782 -0.1191 0.8380 2.9135
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.16756 0.05389 -3.109 0.00188 **
## IC1 1.81162 0.08396 21.577 < 2e-16 ***
## IC2 0.12267 0.04978 2.464 0.01372 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3101.0 on 2236 degrees of freedom
## Residual deviance: 2197.2 on 2234 degrees of freedom
## AIC: 2203.2
##
## Number of Fisher Scoring iterations: 5
#####Component Loadings Plot#######
loadings.ica.btp <- t(ica.btp$A)
#Total <- rowSums(abs(loadings.dat%*%diag(c(.406,.064),2,2)))
loadings.ica.btp <- data.frame(PA1 = loadings.ica.btp[,1], PA2 = loadings.ica.btp[,2], Contribution = abs(loadings.ica.btp[,1]) + abs(loadings.ica.btp[,2]),
PA01 = rep(0,nrow(loadings.ica.btp)), PA02 = rep(0,nrow(loadings.ica.btp)),
Variable = colnames(btp.X))
loadings.ica.btp.plot <- ggplot(data = loadings.ica.btp, aes(colour = Contribution)) +
geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2),
arrow = arrow(length = unit(0.03, "npc"))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) +
geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") +
coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) +
labs(x = "ICA 1", y = "ICA 2",
title = "Independent Component Loadings") +
scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .5)
loadings.ica.btp.plot
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps