#load("E:/Dropbox/ICPSRML2020_admin/data/VB.RData")
#dta$ukpartyid
#dta$uspartyid
#dta$uspartyid7

#btp <- dta[,c("country", "ukpartyid", "uspartyid7", "authsub1", "authsub2", "authsub3", "authsub4", "authtrad1_1", "authtrad1_2", "authtrad1_3", "authtrad1_4", "authaggression_1", "authaggression_2", "authaggression_3", "authaggression_4", "roughedup", "sdo_1", "sdo_2", "sdo_3", "sdo_4", "egalitarianism_1", "egalitarianism_2", "egalitarianism_3", "egalitarianism_4", "suspicion_1", "suspicion_2", "suspicion_3", "suspicion_4", "lwa_1", "lwa_2", "lwa_3", "lwa_4", "lwa_5", "lwa_6", "populism_1", "populism_2", "populism_3", "populism_4", "happybrexit_1", "happytrump_1")]
#btp <- btp[complete.cases(btp[,c(-2,-3)]),]
#btp[,-c(1,2,3)] <- mapply(as.numeric, btp[,-c(1,2,3)])

#table(btp$uspartyid7)

#btp$uspartyid7[btp$uspartyid7 %in% 1:3] <- "1"
#btp$uspartyid7[btp$uspartyid7 == 4] <- "2"
#btp$uspartyid7[btp$uspartyid7 %in% 5:7] <- "3"
#
#btp$uspartyid7 <- factor(btp$uspartyid7, labels = c("Democrat", "Independent", "Republican"))

#colnames(btp)[3] <- "uspartyid" 

# btp$pid.merge <- rep(NA, nrow(btp))
# 
# unique(btp$ukpartyid)
# unique(btp$uspartyid)
# 
# btp$pid.merge[btp$ukpartyid == "Conservative Party"] <- "Conservative"
# btp$pid.merge[btp$uspartyid == "Republican"] <- "Conservative"
# 
# btp$pid.merge[btp$ukpartyid == "No - none"] <- "Independent"
# btp$pid.merge[btp$uspartyid == "Independent"] <- "Independent"
# 
# btp$pid.merge[btp$ukpartyid == "Labour Party"] <- "Liberal"
# btp$pid.merge[btp$uspartyid == "Democrat"] <- "Liberal"
# 
# btp$pid.merge <- factor(btp$pid.merge, levels = c("Liberal", "Independent", "Conservative"))
# 
# btp <- cbind.data.frame(btp[,1:3], btp$pid.merge, btp[,-c(1,2,3,41)])
# 
# colnames(btp)[4] <- "pid.merge"

#saveRDS(btp,"E:/Dropbox/ICPSRML2020_admin/data/BTP_cPCA.RDS")

#library(parallel)
#library(doParallel)

library(tidyverse)
library(ggrepel)
library(ggforce)


library(scPCA)
library(factoextra)
library(readit)
library(fastICA)

set.seed(1995)

PCA

cal <- readRDS("E:/Dropbox/ICPSRML2020_admin/data/cal_voters.RDS")

cal.pca <- prcomp(cal[,-1], center=T, scale=T)

cal.pca$rotation[rev(order(abs(cal.pca$rotation[,1]))), 1:3]

##                           PC1          PC2          PC3
## approve_trump    -0.282937920 -0.032175056  0.116244411
## party             0.279404924 -0.030960656  0.012499770
## favor_kavanaugh  -0.277073944 -0.012418419  0.086464889
## favor_borderwall -0.265821729 -0.063672032  0.073184115
## house_trump      -0.259230391 -0.011741356  0.028396734
## approve_brown     0.249510200 -0.009722059  0.197286925
## favor_localrules  0.247859490  0.019258243  0.017240886
## favor_direction  -0.241777885 -0.046857165  0.279004164
## favor_obamacare   0.241512612 -0.012177865  0.086295160
## view_republicans -0.239267618  0.043445322  0.173462971
## view_democrats    0.234410496  0.027585695  0.140412210
## approve_stateleg  0.231547345 -0.029765130  0.261358943
## ideology          0.227100002 -0.078323995  0.001777398
## size_govt        -0.220401677 -0.028420446 -0.225419294
## favor_gunregs     0.206142668 -0.081001376  0.023860158
## us_econ          -0.173708331 -0.107814861  0.342014750
## approve_congress -0.151919403  0.108367968  0.319014201
## cal_econ          0.110888324 -0.134489164  0.484474444
## race              0.070284757  0.343994993  0.120441277
## gender           -0.058645683 -0.024112916 -0.024887836
## house_enthused    0.048871904 -0.364769370  0.256522430
## age               0.034938029  0.287813975 -0.066780424
## income            0.021506910  0.405996392  0.221631887
## education        -0.017415657  0.453256040  0.275061542
## interest          0.003286214 -0.476734742  0.122699172

cal.pca$rotation[rev(order(abs(cal.pca$rotation[,2]))), 1:3]

##                           PC1          PC2          PC3
## interest          0.003286214 -0.476734742  0.122699172
## education        -0.017415657  0.453256040  0.275061542
## income            0.021506910  0.405996392  0.221631887
## house_enthused    0.048871904 -0.364769370  0.256522430
## race              0.070284757  0.343994993  0.120441277
## age               0.034938029  0.287813975 -0.066780424
## cal_econ          0.110888324 -0.134489164  0.484474444
## approve_congress -0.151919403  0.108367968  0.319014201
## us_econ          -0.173708331 -0.107814861  0.342014750
## favor_gunregs     0.206142668 -0.081001376  0.023860158
## ideology          0.227100002 -0.078323995  0.001777398
## favor_borderwall -0.265821729 -0.063672032  0.073184115
## favor_direction  -0.241777885 -0.046857165  0.279004164
## view_republicans -0.239267618  0.043445322  0.173462971
## approve_trump    -0.282937920 -0.032175056  0.116244411
## party             0.279404924 -0.030960656  0.012499770
## approve_stateleg  0.231547345 -0.029765130  0.261358943
## size_govt        -0.220401677 -0.028420446 -0.225419294
## view_democrats    0.234410496  0.027585695  0.140412210
## gender           -0.058645683 -0.024112916 -0.024887836
## favor_localrules  0.247859490  0.019258243  0.017240886
## favor_kavanaugh  -0.277073944 -0.012418419  0.086464889
## favor_obamacare   0.241512612 -0.012177865  0.086295160
## house_trump      -0.259230391 -0.011741356  0.028396734
## approve_brown     0.249510200 -0.009722059  0.197286925

cal.pca$rotation[order(abs(cal.pca$rotation[,3])), 1:3]

##                           PC1          PC2          PC3
## ideology          0.227100002 -0.078323995  0.001777398
## party             0.279404924 -0.030960656  0.012499770
## favor_localrules  0.247859490  0.019258243  0.017240886
## favor_gunregs     0.206142668 -0.081001376  0.023860158
## gender           -0.058645683 -0.024112916 -0.024887836
## house_trump      -0.259230391 -0.011741356  0.028396734
## age               0.034938029  0.287813975 -0.066780424
## favor_borderwall -0.265821729 -0.063672032  0.073184115
## favor_obamacare   0.241512612 -0.012177865  0.086295160
## favor_kavanaugh  -0.277073944 -0.012418419  0.086464889
## approve_trump    -0.282937920 -0.032175056  0.116244411
## race              0.070284757  0.343994993  0.120441277
## interest          0.003286214 -0.476734742  0.122699172
## view_democrats    0.234410496  0.027585695  0.140412210
## view_republicans -0.239267618  0.043445322  0.173462971
## approve_brown     0.249510200 -0.009722059  0.197286925
## income            0.021506910  0.405996392  0.221631887
## size_govt        -0.220401677 -0.028420446 -0.225419294
## house_enthused    0.048871904 -0.364769370  0.256522430
## approve_stateleg  0.231547345 -0.029765130  0.261358943
## education        -0.017415657  0.453256040  0.275061542
## favor_direction  -0.241777885 -0.046857165  0.279004164
## approve_congress -0.151919403  0.108367968  0.319014201
## us_econ          -0.173708331 -0.107814861  0.342014750
## cal_econ          0.110888324 -0.134489164  0.484474444

cal.pca$rotation[order(abs(cal.pca$rotation[,4])), 1:4]

##                           PC1          PC2          PC3          PC4
## favor_obamacare   0.241512612 -0.012177865  0.086295160  0.007068891
## favor_localrules  0.247859490  0.019258243  0.017240886 -0.010949977
## house_trump      -0.259230391 -0.011741356  0.028396734  0.012140154
## favor_gunregs     0.206142668 -0.081001376  0.023860158  0.026534001
## party             0.279404924 -0.030960656  0.012499770  0.034948492
## favor_kavanaugh  -0.277073944 -0.012418419  0.086464889 -0.036328379
## ideology          0.227100002 -0.078323995  0.001777398 -0.036967293
## race              0.070284757  0.343994993  0.120441277 -0.049541506
## approve_trump    -0.282937920 -0.032175056  0.116244411  0.054213307
## view_republicans -0.239267618  0.043445322  0.173462971  0.061848085
## approve_congress -0.151919403  0.108367968  0.319014201 -0.062560376
## favor_direction  -0.241777885 -0.046857165  0.279004164 -0.062835565
## size_govt        -0.220401677 -0.028420446 -0.225419294 -0.075338770
## view_democrats    0.234410496  0.027585695  0.140412210  0.096013212
## approve_brown     0.249510200 -0.009722059  0.197286925 -0.100646931
## approve_stateleg  0.231547345 -0.029765130  0.261358943 -0.105586086
## favor_borderwall -0.265821729 -0.063672032  0.073184115  0.125657790
## interest          0.003286214 -0.476734742  0.122699172  0.198577626
## education        -0.017415657  0.453256040  0.275061542  0.239855518
## us_econ          -0.173708331 -0.107814861  0.342014750 -0.289685588
## house_enthused    0.048871904 -0.364769370  0.256522430  0.338881988
## cal_econ          0.110888324 -0.134489164  0.484474444 -0.354976678
## gender           -0.058645683 -0.024112916 -0.024887836 -0.382935838
## income            0.021506910  0.405996392  0.221631887  0.409348270
## age               0.034938029  0.287813975 -0.066780424 -0.435994109

PCA Plots

scree.plot.cal <- fviz_eig(cal.pca)
scree.plot.cal

component.plot.cal <- fviz_pca_var(cal.pca,
                    col.var = "contrib", # Color by contributions to the PC
                    gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), #Choose gradient colors
                    title = "Component Loadings Plot - PCA", #Change plot title
                    legend.title = "Contribution", #Change legend title
                    repel = TRUE) # Repel labels away from each other

component.plot.cal

## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ind.plot.cal <- fviz_pca_ind(cal.pca,
             col.ind = cal$vote_house,
             palette = c("#FC4E07", "#00AFBB"), #Choose colors for Dems v. Reps
             addEllipses = TRUE, #Add group ellipses
             legend.title = "Congressional\nVote Choice",
             title = "Individual Plot - PCA",
             geom = "point",
             repel = F)

ind.plot.cal

biplot.cal <- fviz_pca_biplot(cal.pca,
             addEllipses = TRUE,
             title = "Bi-Plot (Individual and Variable) - PCA",
             geom = "point",
             gradient.cols = c("springgreen3", "#E7B800", "orchid3"),
             palette = c("#FC4E07", "#00AFBB"),
             col.var = "contrib",
             fill.ind = cal$vote_house, col.ind = "white",
             pointshape = 21, 
             pointsize = 2,
             repel = T) + 
          labs(fill = "Congressional\nVote Choice", 
               color = "Variable\nContribution")

biplot.cal

## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cPCA

table(cal$party)

## 
##   1   2   3   4   5   6   7 
## 193  62  86   9  82  46 138

cal2 <- cal

cal2$party[cal2$party %in% 1:3] <- 1
cal2$party[cal2$party %in% 4] <- NA
cal2$party[cal2$party %in% 5:7] <- 2
cal2$party <- factor(cal2$party, labels = c("Democrat", "Republican"))

cal2$strength <- rep(NA, nrow(cal2))
cal2$strength[cal$party %in% c(1,7)] <- "Strong"  
#cal2$strength[cal$party %in% c(2,6)] <- "Regular"
#cal2$strength[cal$party %in% c(3,5)] <- "Weak"
cal2$strength[cal$party %in% c(2,3,5,6)] <- "Weak"
cal2$strength <- factor(cal2$strength, levels = c("Weak", "Strong"))

cal2 <- na.omit(cal2)

cal2.X <- cal2[, !colnames(cal2) %in% c("party", "strength", "vote_house")]

###Democrats as target group###
cpca.cal.dem <- scPCA(target = cal2.X[cal2$party == "Democrat",],
                  background = cal2.X[cal2$party == "Republican",],
                  penalties = 0,
                  n_centers = 2)

# create a dataframe to be plotted
cpca.df.cal.dem <- cpca.cal.dem$x %>%
  as_tibble() %>%
  mutate(dem_approval = factor(cal2$view_democrats[cal2$party == "Democrat"], labels = c("Approve", "Disapprove")))
colnames(cpca.df.cal.dem) <- c("cPC1", "cPC2", "Dem_Approval")

# plot the results
cpca.plot.cal.dem <- ggplot(cpca.df.cal.dem, aes(x = cPC1, y = cPC2, colour = Dem_Approval, fill = Dem_Approval)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: Democrats", color = "Democratic Party\nApproval", fill = "Democratic Party\nApproval") +
  theme_minimal() + 
  scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
  scale_fill_manual(values = c("#00AFBB", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.dem

summary(lm(as.numeric(Dem_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.dem))

## 
## Call:
## lm(formula = as.numeric(Dem_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46695 -0.11665 -0.02737  0.10608  0.67293 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.234604   0.009485 130.162   <2e-16 ***
## cPC1         0.980700   0.024574  39.908   <2e-16 ***
## cPC2        -0.038585   0.031526  -1.224    0.222    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1752 on 338 degrees of freedom
## Multiple R-squared:  0.8307, Adjusted R-squared:  0.8296 
## F-statistic: 828.9 on 2 and 338 DF,  p-value: < 2.2e-16

summary(glm(Dem_Approval ~ cPC1 + cPC2, data = cpca.df.cal.dem, family = binomial()))

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## 
## Call:
## glm(formula = Dem_Approval ~ cPC1 + cPC2, family = binomial(), 
##     data = cpca.df.cal.dem)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.26835  -0.00198  -0.00025   0.00000   2.00474  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   -8.704      4.163  -2.091   0.0365 *
## cPC1          34.381     15.065   2.282   0.0225 *
## cPC2           5.310      3.909   1.358   0.1744  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 371.540  on 340  degrees of freedom
## Residual deviance:   7.915  on 338  degrees of freedom
## AIC: 13.915
## 
## Number of Fisher Scoring iterations: 12

loadings.cal.dem <- cpca.cal.dem$rotation

rownames(loadings.cal.dem) <- colnames(cal2.X)
loadings.cal.dem[rev(order(abs(loadings.cal.dem[,1]))),]

##                            V1           V2
## view_democrats    0.890155737  0.040589255
## favor_kavanaugh  -0.264959785  0.303617768
## us_econ           0.151029898  0.120071043
## favor_localrules -0.137919008  0.210545914
## favor_obamacare  -0.136937438 -0.077507259
## approve_stateleg -0.128706559 -0.643946518
## favor_borderwall  0.118560352  0.022229060
## approve_brown    -0.106716952  0.379300981
## size_govt        -0.097344533 -0.339484403
## approve_trump    -0.076206332  0.077841920
## house_trump       0.074220667 -0.392534284
## favor_direction  -0.072864716  0.034958357
## race             -0.066562562 -0.032153841
## cal_econ         -0.041148352 -0.033124123
## view_republicans -0.031401837  0.010244219
## ideology         -0.022674235 -0.000558741
## interest         -0.016966310 -0.023263288
## favor_gunregs     0.015621123  0.001827250
## gender           -0.013595153  0.004071648
## education        -0.011495691 -0.005540192
## approve_congress -0.010045129 -0.048945424
## income           -0.008710733 -0.015248340
## age               0.008543642 -0.014757672
## house_enthused    0.001078784  0.009793473

loadings.cal.dem[rev(order(abs(loadings.cal.dem[,2]))),]

##                            V1           V2
## approve_stateleg -0.128706559 -0.643946518
## house_trump       0.074220667 -0.392534284
## approve_brown    -0.106716952  0.379300981
## size_govt        -0.097344533 -0.339484403
## favor_kavanaugh  -0.264959785  0.303617768
## favor_localrules -0.137919008  0.210545914
## us_econ           0.151029898  0.120071043
## approve_trump    -0.076206332  0.077841920
## favor_obamacare  -0.136937438 -0.077507259
## approve_congress -0.010045129 -0.048945424
## view_democrats    0.890155737  0.040589255
## favor_direction  -0.072864716  0.034958357
## cal_econ         -0.041148352 -0.033124123
## race             -0.066562562 -0.032153841
## interest         -0.016966310 -0.023263288
## favor_borderwall  0.118560352  0.022229060
## income           -0.008710733 -0.015248340
## age               0.008543642 -0.014757672
## view_republicans -0.031401837  0.010244219
## house_enthused    0.001078784  0.009793473
## education        -0.011495691 -0.005540192
## gender           -0.013595153  0.004071648
## favor_gunregs     0.015621123  0.001827250
## ideology         -0.022674235 -0.000558741

### Component Loadings Plot ###
loadings.cal.dem <- data.frame(PA1 = loadings.cal.dem[,1], PA2 = loadings.cal.dem[,2], Contribution = abs(loadings.cal.dem[,1]) + abs(loadings.cal.dem[,2]),
                           PA01 = rep(0,nrow(loadings.cal.dem)), PA02 = rep(0,nrow(loadings.cal.dem)),
                           Variable = colnames(cal2.X))


loadings.cal.dem.plot <- ggplot(data = loadings.cal.dem, aes(colour = Contribution)) + 
  geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2), 
               arrow = arrow(length = unit(0.03, "npc"))) + 
  geom_vline(xintercept = 0, linetype = "dashed") + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) + 
  geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") + 
  coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) + 
  labs(x = "cPCA 1", y = "cPCA 2", 
       title = "Contrastive Principal Component Loadings\nTarget Group: Democrats") + 
  scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)

loadings.cal.dem.plot

## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

###Democrats as target group###
cpca.cal.rep <- scPCA(target = cal2.X[cal2$party == "Republican",],
                  background = cal2.X[cal2$party == "Democrat",],
                  penalties = 0,
                  n_centers = 2)

# create a dataframe to be plotted
cpca.df.cal.rep <- cpca.cal.rep$x %>%
  as_tibble() %>%
  mutate(wall = factor(cal2$favor_borderwall[cal2$party == "Republican"], labels = c("Approve", "Disapprove")), approve_trump = factor(cal2$approve_trump[cal2$party == "Republican"], labels = c("Approve", "Disapprove")), vote_house = cal2$vote_house[cal2$party == "Republican"])
colnames(cpca.df.cal.rep) <- c("cPC1", "cPC2", "Wall_Approval", "Trump_Approval", "Vote_House")

# Wall Approval Plot
cpca.plot.cal.rep.wall <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Wall_Approval, fill = Wall_Approval)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: Republicans", color = "Border Wall\nApproval", fill = "Border Wall\nApproval") +
  theme_minimal() +
  scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
  scale_fill_manual(values = c("#00AFBB", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.wall

# Trump Approval Plot
cpca.plot.cal.rep.trump <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Trump_Approval, fill = Trump_Approval)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: Republicans", color = "Trump\nApproval", fill = "Trump\nApproval") +
  theme_minimal() +
  scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
  scale_fill_manual(values = c("#00AFBB", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.trump

# Congressional vote Plot
cpca.plot.cal.rep.house <- ggplot(cpca.df.cal.rep, aes(x = cPC1, y = cPC2, colour = Vote_House, fill = Vote_House)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: Republicans", color = "House Vote", fill = "House Vote") +
  theme_minimal() +
  scale_color_manual(values = c("#FC4E07", "#00AFBB")) +
  scale_fill_manual(values = c("#FC4E07", "#00AFBB")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.cal.rep.house

summary(lm(as.numeric(Wall_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.rep))

## 
## Call:
## lm(formula = as.numeric(Wall_Approval) ~ cPC1 + cPC2, data = cpca.df.cal.rep)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70609 -0.09284  0.00041  0.10592  0.63513 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.25564    0.01186  105.84   <2e-16 ***
## cPC1        -0.62215    0.01911  -32.55   <2e-16 ***
## cPC2         0.35129    0.03058   11.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1935 on 263 degrees of freedom
## Multiple R-squared:  0.8055, Adjusted R-squared:  0.804 
## F-statistic: 544.5 on 2 and 263 DF,  p-value: < 2.2e-16

summary(glm(Wall_Approval ~ cPC1 + cPC2, data = cpca.df.cal.rep, family = binomial()))

## 
## Call:
## glm(formula = Wall_Approval ~ cPC1 + cPC2, family = binomial(), 
##     data = cpca.df.cal.rep)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.60033  -0.04092  -0.01625   0.00285   2.17930  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.2198     0.9959  -4.237 2.26e-05 ***
## cPC1        -10.2438     2.0491  -4.999 5.76e-07 ***
## cPC2          5.4784     1.3848   3.956 7.61e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 302.413  on 265  degrees of freedom
## Residual deviance:  30.953  on 263  degrees of freedom
## AIC: 36.953
## 
## Number of Fisher Scoring iterations: 9

loadings.cal.rep <- cpca.cal.rep$rotation

rownames(loadings.cal.rep) <- colnames(cal2.X)
loadings.cal.rep[rev(order(abs(loadings.cal.rep[,1]))),]

##                            V1           V2
## favor_borderwall -0.758839394  0.376646721
## view_republicans -0.430135247 -0.287753336
## favor_obamacare   0.250772833  0.111912723
## approve_brown     0.179210804  0.010090234
## favor_localrules  0.176983528  0.076818618
## favor_direction   0.154202561 -0.012938780
## approve_congress -0.139572527 -0.513576791
## house_enthused   -0.133185879 -0.063515002
## interest         -0.119955785 -0.064529695
## favor_gunregs     0.119002911  0.027896284
## house_trump       0.116420624  0.010328517
## education         0.040602510  0.018909800
## us_econ          -0.039833199 -0.064297506
## cal_econ          0.037145127  0.064449190
## approve_trump    -0.036983699 -0.632461170
## ideology          0.035946539  0.012364788
## size_govt        -0.032974872  0.066538253
## gender            0.024122978  0.008415217
## view_democrats    0.019914883  0.030567034
## approve_stateleg  0.016410114  0.023758741
## race             -0.007095832 -0.018055134
## age               0.005215688  0.004253755
## income            0.005016890  0.002988666
## favor_kavanaugh  -0.002161387  0.261993868

loadings.cal.rep[rev(order(abs(loadings.cal.rep[,2]))),]

##                            V1           V2
## approve_trump    -0.036983699 -0.632461170
## approve_congress -0.139572527 -0.513576791
## favor_borderwall -0.758839394  0.376646721
## view_republicans -0.430135247 -0.287753336
## favor_kavanaugh  -0.002161387  0.261993868
## favor_obamacare   0.250772833  0.111912723
## favor_localrules  0.176983528  0.076818618
## size_govt        -0.032974872  0.066538253
## interest         -0.119955785 -0.064529695
## cal_econ          0.037145127  0.064449190
## us_econ          -0.039833199 -0.064297506
## house_enthused   -0.133185879 -0.063515002
## view_democrats    0.019914883  0.030567034
## favor_gunregs     0.119002911  0.027896284
## approve_stateleg  0.016410114  0.023758741
## education         0.040602510  0.018909800
## race             -0.007095832 -0.018055134
## favor_direction   0.154202561 -0.012938780
## ideology          0.035946539  0.012364788
## house_trump       0.116420624  0.010328517
## approve_brown     0.179210804  0.010090234
## gender            0.024122978  0.008415217
## age               0.005215688  0.004253755
## income            0.005016890  0.002988666

### Component Loadings Plot ###
loadings.cal.rep <- data.frame(PA1 = loadings.cal.rep[,1], PA2 = loadings.cal.rep[,2], Contribution = abs(loadings.cal.rep[,1]) + abs(loadings.cal.rep[,2]),
                           PA01 = rep(0,nrow(loadings.cal.rep)), PA02 = rep(0,nrow(loadings.cal.rep)),
                           Variable = colnames(cal2.X))


loadings.cal.rep.plot <- ggplot(data = loadings.cal.rep, aes(colour = Contribution)) + 
  geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2), 
               arrow = arrow(length = unit(0.03, "npc"))) + 
  geom_vline(xintercept = 0, linetype = "dashed") + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) + 
  geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") + 
  coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) + 
  labs(x = "ICA 1", y = "ICA 2", 
       title = "Contrastive Principal Component Loadings\nTarget Group: Republicans") + 
  scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)

loadings.cal.rep.plot

## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

BtP Dataset

btp <- readRDS("E:/Dropbox/ICPSRML2020_admin/data/BTP_cPCA.RDS")
btp.X <- btp[,-c(1,2,3,4)]


btp.pca <- prcomp(btp.X, center=T, scale=T)

PCA Plots

scree.plot.btp <- fviz_eig(btp.pca)
scree.plot.btp

component.plot.btp <- fviz_pca_var(btp.pca,
                    col.var = "contrib", # Color by contributions to the PC
                    gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), #Choose gradient colors
                    title = "Component Loadings Plot - PCA", #Change plot title
                    legend.title = "Contribution", #Change legend title
                    repel = TRUE) # Repel labels away from each other

component.plot.btp

## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ind.plot.btp.country <- fviz_pca_ind(btp.pca,
             col.ind = btp$country,
             palette = c("#FC4E07", "#00AFBB"), #Choose colors for Dems v. Reps
             addEllipses = TRUE, #Add group ellipses
             legend.title = "Country",
             title = "Individual Plot - PCA",
             geom = "point",
             repel = F)

ind.plot.btp.country

ind.plot.btp.pid <- fviz_pca_ind(btp.pca,
             col.ind = btp$pid.merge,
             palette = c("#00AFBB","forestgreen", "#FC4E07"), #Choose colors for Dems v. Reps
             addEllipses = TRUE, #Add group ellipses
             legend.title = "Party ID",
             title = "Individual Plot - PCA",
             geom = "point",
             repel = F)

ind.plot.btp.pid

## Warning: Removed 733 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

biplot.btp <- fviz_pca_biplot(btp.pca,
             addEllipses = TRUE,
             title = "Bi-Plot (Individual and Variable) - PCA",
             geom = "point",
             gradient.cols = c("springgreen3", "#E7B800", "orchid3"),
             palette = c("#FC4E07", "#00AFBB"),
             col.var = "contrib",
             fill.ind = btp$country, col.ind = "white",
             pointshape = 21, 
             pointsize = 2,
             repel = T) + 
          labs(fill = "Country", 
               color = "Variable\nContribution")

biplot.btp

## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

###US as target group###
cpca.btp.us <- scPCA(target = btp.X[btp$country == "US",],
                  background = btp.X[btp$country == "UK",],
                  penalties = 0,
                  n_centers = 2)

# create a dataframe to be plotted
cpca.df.btp.us <- cpca.btp.us$x %>%
  as_tibble() %>%
  mutate(pid = btp$uspartyid[btp$country == "US"], auth = factor(btp$authsub3[btp$country == "US"], labels = c("Low", "High")))
colnames(cpca.df.btp.us) <- c("cPC1", "cPC2", "Party_ID", "Authoritarian")
cpca.df.btp.us.pid <- na.omit(cpca.df.btp.us)

### Party ID Plot
cpca.plot.btp.us.pid <- ggplot(cpca.df.btp.us.pid, aes(x = cPC1, y = cPC2, colour = Party_ID, fill = Party_ID)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: US", color = "Party ID", fill = "Party ID") +
  theme_minimal() +
  scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) +
  scale_fill_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.us.pid

### Authoritarian Plot
cpca.plot.btp.us.auth <- ggplot(cpca.df.btp.us, aes(x = cPC1, y = cPC2, colour = Authoritarian, fill = Authoritarian)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: US", color = "Authoritarian") +
  theme_minimal() + 
  scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
  scale_fill_manual(values = c("#00AFBB", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.us.auth

### Both Plot
cpca.plot.btp.us.both <- ggplot(cpca.df.btp.us, aes(x = cPC1, y = cPC2, color = Party_ID, fill = Authoritarian)) +
  geom_jitter() +
  labs(title = "cPCA Target Group: US", color = "Party ID", fill = "Authoritarianism") +
  theme_minimal() + 
  scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07")) +
  scale_fill_manual(values = c("#E7B800", "orchid3")) +
  stat_ellipse(aes(x = cPC1, y = cPC2,fill = Authoritarian),geom = "polygon", alpha = .3, inherit.aes = F)
cpca.plot.btp.us.both

## Warning: Removed 53 rows containing missing values (geom_point).

# Party ID regressions

summary(lm(as.numeric(Party_ID) ~ cPC1 + cPC2, data = cpca.df.btp.us))

## 
## Call:
## lm(formula = as.numeric(Party_ID) ~ cPC1 + cPC2, data = cpca.df.btp.us)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.21163 -0.54204 -0.08265  0.52173  1.86325 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.045096   0.019432 105.243  < 2e-16 ***
## cPC1        0.209245   0.006909  30.287  < 2e-16 ***
## cPC2        0.140210   0.043703   3.208  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7156 on 1353 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.4123, Adjusted R-squared:  0.4114 
## F-statistic: 474.6 on 2 and 1353 DF,  p-value: < 2.2e-16

summary(glm(Party_ID ~ cPC1 + cPC2, data = cpca.df.btp.us, family = binomial()))

## 
## Call:
## glm(formula = Party_ID ~ cPC1 + cPC2, family = binomial(), data = cpca.df.btp.us)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0648  -0.7362   0.2021   0.6845   2.6062  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.73377    0.08044   9.122   <2e-16 ***
## cPC1         0.70886    0.03944  17.975   <2e-16 ***
## cPC2         0.39930    0.15943   2.505   0.0123 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1837.1  on 1355  degrees of freedom
## Residual deviance: 1203.0  on 1353  degrees of freedom
##   (53 observations deleted due to missingness)
## AIC: 1209
## 
## Number of Fisher Scoring iterations: 5

loadings.us <- cpca.btp.us$rotation

rownames(loadings.us) <- colnames(btp.X)
loadings.us[rev(order(abs(loadings.us[,1]))),]

##                            V1            V2
## lwa_4            -0.406723132  0.0066299212
## happytrump_1      0.333529576 -0.0029147682
## populism_2       -0.319999220  0.0340266398
## lwa_2            -0.310061575  0.0630453070
## authtrad1_2      -0.265541820  0.0003852233
## sdo_2            -0.256601943  0.0360429524
## authaggression_3  0.208637535  0.0110961265
## lwa_1            -0.201022357 -0.0173206422
## sdo_3            -0.188627447  0.0697628062
## sdo_1            -0.185576240 -0.0146080101
## authaggression_1  0.167486500 -0.0437943194
## suspicion_1       0.151510152  0.0393740291
## roughedup         0.147412845  0.0301092678
## lwa_3             0.139966372 -0.0139270685
## authaggression_4 -0.128805088  0.0267190314
## happybrexit_1    -0.125252324  0.0062415136
## egalitarianism_1 -0.115100323  0.0447251380
## egalitarianism_4 -0.114289273 -0.0300705423
## egalitarianism_2  0.113998047  0.0099779582
## authsub3         -0.101487020 -0.5826853195
## populism_3        0.095450220 -0.0134094672
## authtrad1_3       0.094087030  0.0035035386
## sdo_4            -0.087097102 -0.0281026067
## authtrad1_1      -0.084630391  0.0124571266
## lwa_6            -0.081147476  0.0812410290
## authtrad1_4       0.073157073 -0.0376456895
## suspicion_4       0.071386439 -0.0564709183
## lwa_5             0.065196604 -0.0178370884
## suspicion_2       0.047296291 -0.0425524843
## authsub1         -0.040956272 -0.3952575847
## populism_1        0.040480026 -0.0390836658
## populism_4        0.016711284  0.0167922932
## suspicion_3       0.015868337  0.0134571269
## authsub2         -0.010664363  0.4481403968
## authaggression_2  0.008119384 -0.0110600909
## authsub4         -0.002812553  0.5160920436
## egalitarianism_3  0.001531861 -0.0130372594

loadings.us[rev(order(abs(loadings.us[,2]))),]

##                            V1            V2
## authsub3         -0.101487020 -0.5826853195
## authsub4         -0.002812553  0.5160920436
## authsub2         -0.010664363  0.4481403968
## authsub1         -0.040956272 -0.3952575847
## lwa_6            -0.081147476  0.0812410290
## sdo_3            -0.188627447  0.0697628062
## lwa_2            -0.310061575  0.0630453070
## suspicion_4       0.071386439 -0.0564709183
## egalitarianism_1 -0.115100323  0.0447251380
## authaggression_1  0.167486500 -0.0437943194
## suspicion_2       0.047296291 -0.0425524843
## suspicion_1       0.151510152  0.0393740291
## populism_1        0.040480026 -0.0390836658
## authtrad1_4       0.073157073 -0.0376456895
## sdo_2            -0.256601943  0.0360429524
## populism_2       -0.319999220  0.0340266398
## roughedup         0.147412845  0.0301092678
## egalitarianism_4 -0.114289273 -0.0300705423
## sdo_4            -0.087097102 -0.0281026067
## authaggression_4 -0.128805088  0.0267190314
## lwa_5             0.065196604 -0.0178370884
## lwa_1            -0.201022357 -0.0173206422
## populism_4        0.016711284  0.0167922932
## sdo_1            -0.185576240 -0.0146080101
## lwa_3             0.139966372 -0.0139270685
## suspicion_3       0.015868337  0.0134571269
## populism_3        0.095450220 -0.0134094672
## egalitarianism_3  0.001531861 -0.0130372594
## authtrad1_1      -0.084630391  0.0124571266
## authaggression_3  0.208637535  0.0110961265
## authaggression_2  0.008119384 -0.0110600909
## egalitarianism_2  0.113998047  0.0099779582
## lwa_4            -0.406723132  0.0066299212
## happybrexit_1    -0.125252324  0.0062415136
## authtrad1_3       0.094087030  0.0035035386
## happytrump_1      0.333529576 -0.0029147682
## authtrad1_2      -0.265541820  0.0003852233

### Component Loadings Plot ###


loadings.us <- data.frame(PA1 = loadings.us[,1], PA2 = loadings.us[,2], Contribution = abs(loadings.us[,1]) + abs(loadings.us[,2]),
                           PA01 = rep(0,nrow(loadings.us)), PA02 = rep(0,nrow(loadings.us)),
                           Variable = colnames(btp.X))


loadings.us.plot <- ggplot(data = loadings.us, aes(colour = Contribution)) + 
  geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2), 
               arrow = arrow(length = unit(0.03, "npc"))) + 
  geom_vline(xintercept = 0, linetype = "dashed") + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) + 
  geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") + 
  coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) + 
  labs(x = "cPCA 1", y = "cPCA 2", 
       title = "Contrastive Principal Component Loadings\nTarget Group: US Respondents") + 
  scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)

loadings.us.plot

## Warning: ggrepel: 33 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

###UK as target group###

cpca.btp.uk <- scPCA(target = btp.X[btp$country == "UK",],
                  background = btp.X[btp$country == "US",],
                  penalties = 0,
                  n_centers = 2)

# create a dataframe to be plotted
cpca.df.btp.uk <- cpca.btp.uk$x %>%
  as_tibble() %>%
  mutate(brexit = btp$happybrexit_1[btp$country == "UK"], brexit.cat = factor(btp$happybrexit_1[btp$country == "UK"], labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")))
colnames(cpca.df.btp.uk) <- c("cPC1", "cPC2", "Brexit_Support", "Brexit.cat")

# plot the results
cpca.plot.btp.uk <- ggplot(cpca.df.btp.uk, aes(x = cPC1, y = cPC2, colour = Brexit_Support, fill = Brexit.cat)) +
  geom_point(alpha = 0.5) +
  labs(title = "cPCA Target Group: UK", color = "Brexit\nSupport", fill = "Brexit\nSupport\n(Categorical)") +
  theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) +
  scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
cpca.plot.btp.uk

summary(lm(Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk))

## 
## Call:
## lm(formula = Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9934 -0.8550 -0.0550  0.7715  4.7209 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.869543   0.028532  170.67   <2e-16 ***
## cPC1        1.247196   0.009907  125.89   <2e-16 ***
## cPC2        1.130860   0.043351   26.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.251 on 1921 degrees of freedom
## Multiple R-squared:  0.8992, Adjusted R-squared:  0.8991 
## F-statistic:  8566 on 2 and 1921 DF,  p-value: < 2.2e-16

summary(glm(Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk))

## 
## Call:
## glm(formula = Brexit_Support ~ cPC1 + cPC2, data = cpca.df.btp.uk)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.9934  -0.8550  -0.0550   0.7715   4.7209  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.869543   0.028532  170.67   <2e-16 ***
## cPC1        1.247196   0.009907  125.89   <2e-16 ***
## cPC2        1.130860   0.043351   26.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 1.566232)
## 
##     Null deviance: 29842.3  on 1923  degrees of freedom
## Residual deviance:  3008.7  on 1921  degrees of freedom
## AIC: 6328.3
## 
## Number of Fisher Scoring iterations: 2

loadings.uk <- cpca.btp.uk$rotation

rownames(loadings.uk) <- colnames(btp.X)
loadings.uk[rev(order(abs(loadings.uk[,1]))),]

##                            V1            V2
## happybrexit_1     0.749839745  2.701631e-02
## lwa_3            -0.334838210  4.671154e-02
## lwa_4             0.268863492 -1.286241e-01
## happytrump_1     -0.207652779  1.858911e-02
## authaggression_4 -0.176150035  6.652192e-02
## populism_3       -0.163668835  5.679898e-02
## authaggression_1 -0.156119792 -7.214861e-03
## populism_4        0.136800007  2.488584e-01
## authaggression_2  0.134329946  5.573821e-02
## sdo_4            -0.132081068  3.213242e-02
## roughedup        -0.128679690  2.097051e-02
## egalitarianism_3 -0.094974741  1.266431e-02
## sdo_2             0.080156310  3.484483e-02
## authtrad1_4       0.075817842 -3.403901e-02
## populism_1        0.073163399 -3.479017e-02
## sdo_3            -0.064866130  6.804666e-02
## authaggression_3  0.056758293 -1.219197e-01
## egalitarianism_1  0.056312323 -6.502809e-02
## sdo_1             0.049984361 -9.189162e-03
## egalitarianism_4 -0.048043500  3.718706e-02
## egalitarianism_2 -0.045274475 -2.235646e-03
## authtrad1_2      -0.043579612  2.711178e-02
## lwa_1             0.042242056  6.555546e-02
## populism_2        0.038565617  8.616577e-05
## authsub1          0.036613787  5.986237e-01
## suspicion_2      -0.035309228 -1.047677e-01
## authtrad1_1       0.034304286 -5.242067e-03
## suspicion_4      -0.030679040  1.291797e-03
## lwa_2             0.029531777  1.333303e-02
## suspicion_1       0.025594384 -1.150830e-01
## lwa_5            -0.020712192  3.706571e-02
## authsub2         -0.020192759 -6.417635e-01
## authtrad1_3      -0.019529206 -5.089239e-03
## authsub4          0.016309102  2.317781e-01
## suspicion_3      -0.012326195 -4.672539e-02
## authsub3         -0.012325480  1.344138e-01
## lwa_6             0.008349547  4.477272e-02

loadings.uk[rev(order(abs(loadings.uk[,2]))),]

##                            V1            V2
## authsub2         -0.020192759 -6.417635e-01
## authsub1          0.036613787  5.986237e-01
## populism_4        0.136800007  2.488584e-01
## authsub4          0.016309102  2.317781e-01
## authsub3         -0.012325480  1.344138e-01
## lwa_4             0.268863492 -1.286241e-01
## authaggression_3  0.056758293 -1.219197e-01
## suspicion_1       0.025594384 -1.150830e-01
## suspicion_2      -0.035309228 -1.047677e-01
## sdo_3            -0.064866130  6.804666e-02
## authaggression_4 -0.176150035  6.652192e-02
## lwa_1             0.042242056  6.555546e-02
## egalitarianism_1  0.056312323 -6.502809e-02
## populism_3       -0.163668835  5.679898e-02
## authaggression_2  0.134329946  5.573821e-02
## suspicion_3      -0.012326195 -4.672539e-02
## lwa_3            -0.334838210  4.671154e-02
## lwa_6             0.008349547  4.477272e-02
## egalitarianism_4 -0.048043500  3.718706e-02
## lwa_5            -0.020712192  3.706571e-02
## sdo_2             0.080156310  3.484483e-02
## populism_1        0.073163399 -3.479017e-02
## authtrad1_4       0.075817842 -3.403901e-02
## sdo_4            -0.132081068  3.213242e-02
## authtrad1_2      -0.043579612  2.711178e-02
## happybrexit_1     0.749839745  2.701631e-02
## roughedup        -0.128679690  2.097051e-02
## happytrump_1     -0.207652779  1.858911e-02
## lwa_2             0.029531777  1.333303e-02
## egalitarianism_3 -0.094974741  1.266431e-02
## sdo_1             0.049984361 -9.189162e-03
## authaggression_1 -0.156119792 -7.214861e-03
## authtrad1_1       0.034304286 -5.242067e-03
## authtrad1_3      -0.019529206 -5.089239e-03
## egalitarianism_2 -0.045274475 -2.235646e-03
## suspicion_4      -0.030679040  1.291797e-03
## populism_2        0.038565617  8.616577e-05

###Principal Component Loadings###

loadings.uk <- data.frame(PA1 = loadings.uk[,1], PA2 = loadings.uk[,2], Contribution = abs(loadings.uk[,1]) + abs(loadings.uk[,2]),
                           PA01 = rep(0,nrow(loadings.uk)), PA02 = rep(0,nrow(loadings.uk)),
                           Variable = colnames(btp.X))


loadings.uk.plot <- ggplot(data = loadings.uk, aes(colour = Contribution)) + 
  geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2), 
               arrow = arrow(length = unit(0.03, "npc"))) + 
  geom_vline(xintercept = 0, linetype = "dashed") + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) + 
  geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") + 
  coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) + 
  labs(x = "cPCA 1", y = "cPCA 2", 
       title = "Contrastive Principal Component Loadings\nTarget Group: UK Respondents") + 
  scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .4)

loadings.uk.plot

## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ICA Test Using BtP Data

ica.btp <- fastICA(btp.X, 2, "deflation", "logcosh", tol = 0.00001, row.norm = T, verbose = T)

## Centering

## Whitening

## Deflation FastICA using logcosh approx. to neg-entropy function

## Component 1

## Iteration 1 tol = 0.2051195

## Iteration 2 tol = 0.2005046

## Iteration 3 tol = 0.1970449

## Iteration 4 tol = 0.1925211

## Iteration 5 tol = 0.1890349

## Iteration 6 tol = 0.1845145

## Iteration 7 tol = 0.1809467

## Iteration 8 tol = 0.1763612

## Iteration 9 tol = 0.1726652

## Iteration 10 tol = 0.16796

## Iteration 11 tol = 0.1640965

## Iteration 12 tol = 0.1592283

## Iteration 13 tol = 0.1551649

## Iteration 14 tol = 0.150102

## Iteration 15 tol = 0.1458151

## Iteration 16 tol = 0.1405394

## Iteration 17 tol = 0.1360168

## Iteration 18 tol = 0.1305265

## Iteration 19 tol = 0.1257717

## Iteration 20 tol = 0.1200855

## Iteration 21 tol = 0.1151213

## Iteration 22 tol = 0.1092821

## Iteration 23 tol = 0.1041548

## Iteration 24 tol = 0.09823166

## Iteration 25 tol = 0.09301263

## Iteration 26 tol = 0.08709983

## Iteration 27 tol = 0.08188361

## Iteration 28 tol = 0.0760954

## Iteration 29 tol = 0.07099334

## Iteration 30 tol = 0.06545341

## Iteration 31 tol = 0.0605826

## Iteration 32 tol = 0.05540997

## Iteration 33 tol = 0.05087934

## Iteration 34 tol = 0.04617358

## Iteration 35 tol = 0.04207046

## Iteration 36 tol = 0.03789982

## Iteration 37 tol = 0.03428033

## Iteration 38 tol = 0.03067592

## Iteration 39 tol = 0.02756161

## Iteration 40 tol = 0.02451851

## Iteration 41 tol = 0.02189914

## Iteration 42 tol = 0.01938313

## Iteration 43 tol = 0.01722392

## Iteration 44 tol = 0.0151811

## Iteration 45 tol = 0.01343168

## Iteration 46 tol = 0.01179827

## Iteration 47 tol = 0.01040124

## Iteration 48 tol = 0.00911154

## Iteration 49 tol = 0.008009116

## Iteration 50 tol = 0.007001115

## Iteration 51 tol = 0.006139481

## Iteration 52 tol = 0.005358002

## Iteration 53 tol = 0.004689698

## Iteration 54 tol = 0.004087667

## Iteration 55 tol = 0.003572435

## Iteration 56 tol = 0.003110922

## Iteration 57 tol = 0.002715572

## Iteration 58 tol = 0.002363117

## Iteration 59 tol = 0.002060866

## Iteration 60 tol = 0.001792474

## Iteration 61 tol = 0.001562056

## Iteration 62 tol = 0.001358125

## Iteration 63 tol = 0.001182852

## Iteration 64 tol = 0.001028157

## Iteration 65 tol = 0.000895057

## Iteration 66 tol = 0.0007778563

## Iteration 67 tol = 0.0006769135

## Iteration 68 tol = 0.0005882022

## Iteration 69 tol = 0.0005117238

## Iteration 70 tol = 0.0004446233

## Iteration 71 tol = 0.0003867248

## Iteration 72 tol = 0.0003359968

## Iteration 73 tol = 0.0002921903

## Iteration 74 tol = 0.0002538544

## Iteration 75 tol = 0.0002207253

## Iteration 76 tol = 0.0001917625

## Iteration 77 tol = 0.0001667171

## Iteration 78 tol = 0.0001448402

## Iteration 79 tol = 0.0001259112

## Iteration 80 tol = 0.000109389

## Iteration 81 tol = 9.508578e-05

## Iteration 82 tol = 8.260895e-05

## Iteration 83 tol = 7.180289e-05

## Iteration 84 tol = 6.238169e-05

## Iteration 85 tol = 5.421878e-05

## Iteration 86 tol = 4.710526e-05

## Iteration 87 tol = 4.09396e-05

## Iteration 88 tol = 3.55687e-05

## Iteration 89 tol = 3.091199e-05

## Iteration 90 tol = 2.685692e-05

## Iteration 91 tol = 2.33401e-05

## Iteration 92 tol = 2.027853e-05

## Iteration 93 tol = 1.76227e-05

## Iteration 94 tol = 1.531125e-05

## Iteration 95 tol = 1.330571e-05

## Iteration 96 tol = 1.15606e-05

## Iteration 97 tol = 1.004616e-05

## Iteration 98 tol = 8.728639e-06

## Component 2

## Iteration 1 tol = 0

ica.X <- t(ica.btp$A) # Estimated matrix

rownames(ica.X) <- colnames(btp.X)

colnames(ica.X) <- c("IC1", "IC2")

ica.X[rev(order(abs(ica.X[,1]))),1:2] #Factor/Component loadings

##                          IC1         IC2
## authtrad1_3       0.68490180  0.13089438
## authtrad1_4       0.68167633  0.07680311
## authtrad1_1       0.66626428  0.10268743
## happytrump_1      0.65330678  0.20916839
## lwa_2            -0.63959067  0.30735530
## happybrexit_1     0.56894282  0.17811329
## authtrad1_2      -0.55968342  0.33643366
## egalitarianism_2  0.55694820  0.43817681
## egalitarianism_4 -0.51434772  0.03936232
## roughedup         0.50628236  0.35895608
## authaggression_3  0.48590738  0.11277559
## sdo_2            -0.46516595  0.15168546
## egalitarianism_3  0.46495268  0.49183156
## authsub1          0.46402278 -0.07320862
## authaggression_2  0.46012148  0.10798396
## authaggression_4 -0.44966986  0.21432098
## lwa_1            -0.40659577  0.46117866
## egalitarianism_1 -0.38950833 -0.01955628
## authsub2          0.38203297  0.14013115
## authsub4          0.34988129  0.16482354
## lwa_4            -0.33342333  0.32084164
## sdo_1             0.32222211  0.67102921
## authaggression_1 -0.27176177  0.04818908
## authsub3          0.26974796  0.19480693
## suspicion_4       0.17833443  0.02289231
## lwa_5             0.16019212  0.22214459
## sdo_3             0.15061466  0.76036962
## lwa_3             0.14797780 -0.08557332
## sdo_4             0.14107600  0.76355368
## populism_1        0.13572520  0.49886966
## populism_3        0.12297414  0.31852355
## populism_4        0.10950240 -0.09214674
## lwa_6             0.09806222  0.52869531
## populism_2       -0.09744551  0.43427427
## suspicion_3       0.09410816 -0.23564341
## suspicion_2       0.01907737 -0.28135923
## suspicion_1       0.01153922 -0.38524179

ica.X[rev(order(abs(ica.X[,2]))),1:2] #Factor/Component loadings

##                          IC1         IC2
## sdo_4             0.14107600  0.76355368
## sdo_3             0.15061466  0.76036962
## sdo_1             0.32222211  0.67102921
## lwa_6             0.09806222  0.52869531
## populism_1        0.13572520  0.49886966
## egalitarianism_3  0.46495268  0.49183156
## lwa_1            -0.40659577  0.46117866
## egalitarianism_2  0.55694820  0.43817681
## populism_2       -0.09744551  0.43427427
## suspicion_1       0.01153922 -0.38524179
## roughedup         0.50628236  0.35895608
## authtrad1_2      -0.55968342  0.33643366
## lwa_4            -0.33342333  0.32084164
## populism_3        0.12297414  0.31852355
## lwa_2            -0.63959067  0.30735530
## suspicion_2       0.01907737 -0.28135923
## suspicion_3       0.09410816 -0.23564341
## lwa_5             0.16019212  0.22214459
## authaggression_4 -0.44966986  0.21432098
## happytrump_1      0.65330678  0.20916839
## authsub3          0.26974796  0.19480693
## happybrexit_1     0.56894282  0.17811329
## authsub4          0.34988129  0.16482354
## sdo_2            -0.46516595  0.15168546
## authsub2          0.38203297  0.14013115
## authtrad1_3       0.68490180  0.13089438
## authaggression_3  0.48590738  0.11277559
## authaggression_2  0.46012148  0.10798396
## authtrad1_1       0.66626428  0.10268743
## populism_4        0.10950240 -0.09214674
## lwa_3             0.14797780 -0.08557332
## authtrad1_4       0.68167633  0.07680311
## authsub1          0.46402278 -0.07320862
## authaggression_1 -0.27176177  0.04818908
## egalitarianism_4 -0.51434772  0.03936232
## suspicion_4       0.17833443  0.02289231
## egalitarianism_1 -0.38950833 -0.01955628

#### Plotting Individual Loadings ####

# create a dataframe to be plotted
ica.df.btp <- ica.btp$S %>%
  as_tibble() %>%
  mutate(brexit = btp$happybrexit_1, trump = btp$happytrump_1, pid = btp$pid.merge, country = btp$country, brexit.cat = factor(btp$happybrexit_1, labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")), trump.cat = factor(btp$happybrexit_1, labels = c("Low","Low","Low","Low","Medium", "Medium", "Medium", "High", "High", "High", "High")))

## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.

colnames(ica.df.btp) <- c("IC1", "IC2", "Brexit_Support", "Trump_Support", "Party_ID", "Country", "Brexit.cat", "Trump.cat")

# plot Brexit Results
ica.plot.btp.brexit <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Brexit_Support, fill = Brexit.cat)) +
  geom_point(alpha = 0.5) +
  labs(title = "ICA BtP", color = "Brexit\nSupport", fill = "Brexit\nSupport\n(Categorical)") +
  theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) + 
  scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) + 
  stat_ellipse( geom = "polygon", alpha = .3)
ica.plot.btp.brexit

# plot Trump Results
ica.plot.btp.trump <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Trump_Support, fill = Brexit.cat)) +
  geom_point(alpha = 0.5) +
  labs(title = "ICA BtP", color = "Trump\nSupport", fill = "Trump\nSupport\n(Categorical)") +
  theme_minimal() + scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", midpoint = 5) +
  scale_fill_manual(values = c("#00AFBB","gold", "#FC4E07")) + 
  stat_ellipse( geom = "polygon", alpha = .3)
ica.plot.btp.trump

# plot PID Results
ica.plot.btp.pid <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Party_ID, fill = Party_ID)) +
  geom_point(alpha = 0.5) +
  labs(title = "ICA BtP", color = "Party\nID") +
  theme_minimal() + scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white")) +
  scale_fill_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white")) + 
  stat_ellipse(geom = "polygon", alpha = .3)
ica.plot.btp.pid

## Warning: Removed 733 rows containing missing values (geom_point).

# # plot Country Results
# ica.plot.btp.country <- ggplot(ica.df.btp, aes(x = IC1, y = IC2, color = Country)) +
#   geom_point(alpha = 0.5) +
#   labs(title = "ICA BtP", color = "Country") +
#   theme_minimal() #+ scale_color_manual(values = c("#00AFBB", "forestgreen", "#FC4E07", "white"))
# ica.plot.btp.country


# IC Regressions


summary(lm(Brexit_Support ~ IC1 + IC2, data = ica.df.btp))

## 
## Call:
## lm(formula = Brexit_Support ~ IC1 + IC2, data = ica.df.btp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7925 -2.0306  0.0036  2.0472 10.2891 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.94959    0.04951   99.97   <2e-16 ***
## IC1          2.02522    0.04951   40.90   <2e-16 ***
## IC2          0.63402    0.04951   12.80   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.858 on 3330 degrees of freedom
## Multiple R-squared:  0.3555, Adjusted R-squared:  0.3551 
## F-statistic: 918.5 on 2 and 3330 DF,  p-value: < 2.2e-16

summary(glm(Brexit.cat ~ IC1 + IC2, data = ica.df.btp, family = binomial()))

## 
## Call:
## glm(formula = Brexit.cat ~ IC1 + IC2, family = binomial(), data = ica.df.btp)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8889  -0.7886   0.4695   0.7665   2.5861  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.68798    0.04365  15.762   <2e-16 ***
## IC1          1.45241    0.05893  24.648   <2e-16 ***
## IC2          0.41595    0.04524   9.195   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4389.2  on 3332  degrees of freedom
## Residual deviance: 3296.0  on 3330  degrees of freedom
## AIC: 3302
## 
## Number of Fisher Scoring iterations: 5

summary(lm(Trump_Support ~ IC1 + IC2, data = ica.df.btp))

## 
## Call:
## lm(formula = Trump_Support ~ IC1 + IC2, data = ica.df.btp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4322 -2.0227  0.1637  1.8774 11.0039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.1809     0.0465   89.91   <2e-16 ***
## IC1           2.4099     0.0465   51.83   <2e-16 ***
## IC2           0.7716     0.0465   16.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.685 on 3330 degrees of freedom
## Multiple R-squared:  0.4707, Adjusted R-squared:  0.4704 
## F-statistic:  1481 on 2 and 3330 DF,  p-value: < 2.2e-16

summary(glm(Trump.cat ~ IC1 + IC2, data = ica.df.btp, family = binomial()))

## 
## Call:
## glm(formula = Trump.cat ~ IC1 + IC2, family = binomial(), data = ica.df.btp)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8889  -0.7886   0.4695   0.7665   2.5861  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.68798    0.04365  15.762   <2e-16 ***
## IC1          1.45241    0.05893  24.648   <2e-16 ***
## IC2          0.41595    0.04524   9.195   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4389.2  on 3332  degrees of freedom
## Residual deviance: 3296.0  on 3330  degrees of freedom
## AIC: 3302
## 
## Number of Fisher Scoring iterations: 5

summary(lm(as.numeric(Party_ID) ~ IC1 + IC2, data = ica.df.btp))

## 
## Call:
## lm(formula = as.numeric(Party_ID) ~ IC1 + IC2, data = ica.df.btp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.02326 -0.71877  0.04374  0.67676  2.14258 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.96731    0.01537 128.032   <2e-16 ***
## IC1          0.49491    0.01522  32.511   <2e-16 ***
## IC2          0.03421    0.01495   2.288   0.0222 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7823 on 2597 degrees of freedom
##   (733 observations deleted due to missingness)
## Multiple R-squared:  0.2894, Adjusted R-squared:  0.2889 
## F-statistic: 528.9 on 2 and 2597 DF,  p-value: < 2.2e-16

ica.df.btp.pid <- ica.df.btp
ica.df.btp.pid$Party_ID[ica.df.btp.pid$Party_ID == "Independent"] <- NA  
ica.df.btp.pid <- na.omit(ica.df.btp.pid)
ica.df.btp.pid$Party_ID <- factor(ica.df.btp.pid$Party_ID, labels = c("Liberal", "Conservative"))


summary(glm(Party_ID ~ IC1 + IC2, data = ica.df.btp.pid, family = binomial()))

## 
## Call:
## glm(formula = Party_ID ~ IC1 + IC2, family = binomial(), data = ica.df.btp.pid)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7278  -0.8782  -0.1191   0.8380   2.9135  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.16756    0.05389  -3.109  0.00188 ** 
## IC1          1.81162    0.08396  21.577  < 2e-16 ***
## IC2          0.12267    0.04978   2.464  0.01372 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3101.0  on 2236  degrees of freedom
## Residual deviance: 2197.2  on 2234  degrees of freedom
## AIC: 2203.2
## 
## Number of Fisher Scoring iterations: 5

#####Component Loadings Plot#######

loadings.ica.btp <- t(ica.btp$A)

#Total <- rowSums(abs(loadings.dat%*%diag(c(.406,.064),2,2)))

loadings.ica.btp <- data.frame(PA1 = loadings.ica.btp[,1], PA2 = loadings.ica.btp[,2], Contribution = abs(loadings.ica.btp[,1]) + abs(loadings.ica.btp[,2]),
                           PA01 = rep(0,nrow(loadings.ica.btp)), PA02 = rep(0,nrow(loadings.ica.btp)),
                           Variable = colnames(btp.X))


loadings.ica.btp.plot <- ggplot(data = loadings.ica.btp, aes(colour = Contribution)) + 
  geom_segment(aes(x = PA01, y = PA02, xend = PA1, yend = PA2), 
               arrow = arrow(length = unit(0.03, "npc"))) + 
  geom_vline(xintercept = 0, linetype = "dashed") + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_text_repel(aes(x = PA1, y = PA2, label = Variable)) + 
  geom_circle(aes(x0 = 0, y0 = 0, r = 1), color = "gray") + 
  coord_fixed(xlim = c(-1, 1), ylim = c(-1,1)) + 
  labs(x = "ICA 1", y = "ICA 2", 
       title = "Independent Component Loadings") + 
  scale_color_gradient2(low = "#00AFBB", mid = "#E7B800", high = "#FC4E07", na.value = NA, midpoint = .5)

loadings.ica.btp.plot

## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

R Notebook

PCA

PCA Plots

cPCA

BtP Dataset

PCA Plots

ICA Test Using BtP Data