8  Regresi lanjutan

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(fastDummies)

sptk21 <- read.csv("sptk2021.csv")
pandemic <- 
  data.frame(prov = sptk21$R101_NAMA,
             urban = sptk21$R105,
             sex = sptk21$B4K4,
             age = sptk21$B4K5,
             marital = sptk21$B4K6,
             education = sptk21$B4K7,
             length_stay = sptk21$LAMA_TINGG,
             employment = sptk21$R601A,
             sat_health = sptk21$R708,
             sat_social = sptk21$R1005,
             happy_during = sptk21$R1501,
             happy_before = sptk21$R1502) %>% 
  mutate(happy_diff = happy_during-happy_before,
         happy_increase = ifelse(happy_diff>0, 1, 0))
str(pandemic)
'data.frame':   74684 obs. of  14 variables:
 $ prov          : chr  "Aceh" "Aceh" "Aceh" "Aceh" ...
 $ urban         : int  2 2 2 2 2 2 2 2 2 2 ...
 $ sex           : int  2 1 2 2 1 1 1 2 1 1 ...
 $ age           : int  95 45 70 60 33 30 43 32 43 36 ...
 $ marital       : int  4 2 4 4 2 2 2 3 2 2 ...
 $ education     : int  2 3 3 3 5 5 4 5 3 10 ...
 $ length_stay   : int  90 40 65 56 30 8 43 32 15 35 ...
 $ employment    : int  2 1 2 1 1 1 1 1 1 1 ...
 $ sat_health    : int  4 7 8 7 8 9 8 8 8 8 ...
 $ sat_social    : int  7 8 8 9 8 9 8 9 6 8 ...
 $ happy_during  : int  7 7 7 8 8 8 8 7 8 9 ...
 $ happy_before  : int  5 6 5 5 6 7 6 5 5 7 ...
 $ happy_diff    : int  2 1 2 3 2 1 2 2 3 2 ...
 $ happy_increase: num  1 1 1 1 1 1 1 1 1 1 ...
# during - before
## if + = during surplus = happy increase
## if - = before surplus = happy decrease
dat <- pandemic %>% dummy_cols(c("urban","sex","marital","employment"))
dat <- dat %>% select(urban_1:sex_2, age, marital_1:marital_4, education:length_stay, 
                      employment_1:employment_2, sat_health:sat_social, happy_during:happy_increase)
str(dat)
'data.frame':   74684 obs. of  19 variables:
 $ urban_1       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ urban_2       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ sex_1         : int  0 1 0 0 1 1 1 0 1 1 ...
 $ sex_2         : int  1 0 1 1 0 0 0 1 0 0 ...
 $ age           : int  95 45 70 60 33 30 43 32 43 36 ...
 $ marital_1     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ marital_2     : int  0 1 0 0 1 1 1 0 1 1 ...
 $ marital_3     : int  0 0 0 0 0 0 0 1 0 0 ...
 $ marital_4     : int  1 0 1 1 0 0 0 0 0 0 ...
 $ education     : int  2 3 3 3 5 5 4 5 3 10 ...
 $ length_stay   : int  90 40 65 56 30 8 43 32 15 35 ...
 $ employment_1  : int  0 1 0 1 1 1 1 1 1 1 ...
 $ employment_2  : int  1 0 1 0 0 0 0 0 0 0 ...
 $ sat_health    : int  4 7 8 7 8 9 8 8 8 8 ...
 $ sat_social    : int  7 8 8 9 8 9 8 9 6 8 ...
 $ happy_during  : int  7 7 7 8 8 8 8 7 8 9 ...
 $ happy_before  : int  5 6 5 5 6 7 6 5 5 7 ...
 $ happy_diff    : int  2 1 2 3 2 1 2 2 3 2 ...
 $ happy_increase: num  1 1 1 1 1 1 1 1 1 1 ...
stargazer(dat, type="latex")

% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Sun, Apr 23, 2023 - 01:52:07
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lccccc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
Statistic & \multicolumn{1}{c}{N} & \multicolumn{1}{c}{Mean} & \multicolumn{1}{c}{St. Dev.} & \multicolumn{1}{c}{Min} & \multicolumn{1}{c}{Max} \\ 
\hline \\[-1.8ex] 
urban\_1 & 74,684 & 0.431 & 0.495 & 0 & 1 \\ 
urban\_2 & 74,684 & 0.569 & 0.495 & 0 & 1 \\ 
sex\_1 & 74,684 & 0.489 & 0.500 & 0 & 1 \\ 
sex\_2 & 74,684 & 0.511 & 0.500 & 0 & 1 \\ 
age & 74,684 & 47.430 & 13.500 & 14 & 98 \\ 
marital\_1 & 74,684 & 0.024 & 0.154 & 0 & 1 \\ 
marital\_2 & 74,684 & 0.813 & 0.390 & 0 & 1 \\ 
marital\_3 & 74,684 & 0.033 & 0.179 & 0 & 1 \\ 
marital\_4 & 74,684 & 0.130 & 0.336 & 0 & 1 \\ 
education & 74,684 & 4.007 & 1.957 & 1 & 10 \\ 
length\_stay & 74,684 & 30.384 & 19.515 & 0 & 98 \\ 
employment\_1 & 74,684 & 0.715 & 0.452 & 0 & 1 \\ 
employment\_2 & 74,684 & 0.285 & 0.452 & 0 & 1 \\ 
sat\_health & 74,684 & 7.651 & 1.496 & 0 & 10 \\ 
sat\_social & 74,684 & 7.974 & 1.371 & 0 & 10 \\ 
happy\_during & 74,684 & 7.761 & 1.333 & 0 & 10 \\ 
happy\_before & 74,684 & 8.052 & 1.410 & 0 & 10 \\ 
happy\_diff & 74,684 & $-$0.291 & 1.164 & $-$10 & 10 \\ 
happy\_increase & 74,684 & 0.111 & 0.314 & 0 & 1 \\ 
\hline \\[-1.8ex] 
\end{tabular} 
\end{table} 
pandemic2 <- 
  data.frame(province = sptk21$R101_NAMA,
             urban = ifelse(sptk21$R105==1, 1, 0),
             female = ifelse(sptk21$B4K4==2, 1, 0),
             age = sptk21$B4K5,
             marital = sptk21$B4K6,
             education = sptk21$B4K7,
             length_stay = sptk21$LAMA_TINGG,
             unemployed = ifelse(sptk21$R601A==2, 1, 0),
             sat_health = sptk21$R708,
             sat_social = sptk21$R1005,
             happy_during = sptk21$R1501,
             happy_before = sptk21$R1502) %>% 
  mutate(happy_diff = happy_during-happy_before,
         happy_increase = ifelse(happy_diff>0, 1, 0),
         marital = factor(recode(marital, `1`="not married yet", `2`="married", `3`="divorced", `4`="widowed"),
                          levels=c("not married yet","married","divorced","widowed")))

#,"sat_health","sat_social"
pandemic3 <- pandemic2 %>% select(!province)
happy_during <- lm(happy_during~., pandemic3 %>% select(!c("happy_before","happy_diff","happy_increase")))
happy_before <- lm(happy_before~., pandemic3 %>% select(!c("happy_during","happy_diff","happy_increase")))
happy_diff <- lm(happy_diff~., pandemic3 %>% select(!c("happy_during","happy_before","happy_increase")))
happy_increase <- glm(happy_increase~., family=binomial, data=pandemic3 %>% select(!c("happy_during","happy_before","happy_diff")))
summary(happy_during)

Call:
lm(formula = happy_during ~ ., data = pandemic3 %>% select(!c("happy_before", 
    "happy_diff", "happy_increase")))

Residuals:
    Min      1Q  Median      3Q     Max 
-9.2122 -0.6399  0.0626  0.7262  5.6659 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      3.1149093  0.0463708  67.174  < 2e-16 ***
urban           -0.0454867  0.0093076  -4.887 1.03e-06 ***
female           0.0855087  0.0098220   8.706  < 2e-16 ***
age              0.0057016  0.0004359  13.079  < 2e-16 ***
maritalmarried   0.2559212  0.0285139   8.975  < 2e-16 ***
maritaldivorced -0.2007229  0.0370534  -5.417 6.08e-08 ***
maritalwidowed   0.0330822  0.0320129   1.033    0.301    
education        0.0715472  0.0024724  28.938  < 2e-16 ***
length_stay     -0.0013099  0.0002859  -4.582 4.61e-06 ***
unemployed       0.0784491  0.0104993   7.472 7.99e-14 ***
sat_health       0.2716369  0.0031148  87.207  < 2e-16 ***
sat_social       0.2255160  0.0032723  68.917  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.184 on 74672 degrees of freedom
Multiple R-squared:  0.2111,    Adjusted R-squared:  0.211 
F-statistic:  1816 on 11 and 74672 DF,  p-value: < 2.2e-16
summary(happy_before)

Call:
lm(formula = happy_before ~ ., data = pandemic3 %>% select(!c("happy_during", 
    "happy_diff", "happy_increase")))

Residuals:
    Min      1Q  Median      3Q     Max 
-8.9686 -0.6139  0.1010  0.8173  5.4675 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      3.4295006  0.0496021  69.140  < 2e-16 ***
urban            0.1757702  0.0099562  17.654  < 2e-16 ***
female           0.1266096  0.0105064  12.051  < 2e-16 ***
age              0.0040274  0.0004663   8.637  < 2e-16 ***
maritalmarried   0.2178123  0.0305009   7.141 9.34e-13 ***
maritaldivorced -0.1747493  0.0396355  -4.409 1.04e-05 ***
maritalwidowed  -0.0341905  0.0342437  -0.998 0.318067    
education        0.0663321  0.0026447  25.081  < 2e-16 ***
length_stay     -0.0011691  0.0003058  -3.823 0.000132 ***
unemployed       0.0493670  0.0112309   4.396 1.11e-05 ***
sat_health       0.2445975  0.0033319  73.411  < 2e-16 ***
sat_social       0.2518427  0.0035003  71.949  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.266 on 74672 degrees of freedom
Multiple R-squared:  0.1933,    Adjusted R-squared:  0.1932 
F-statistic:  1627 on 11 and 74672 DF,  p-value: < 2.2e-16
summary(happy_diff)

Call:
lm(formula = happy_diff ~ ., data = pandemic3 %>% select(!c("happy_during", 
    "happy_before", "happy_increase")))

Residuals:
    Min      1Q  Median      3Q     Max 
-9.8036 -0.6340  0.1908  0.3891 10.5309 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)     -0.3145913  0.0453839  -6.932 4.19e-12 ***
urban           -0.2212570  0.0091095 -24.289  < 2e-16 ***
female          -0.0411009  0.0096129  -4.276 1.91e-05 ***
age              0.0016741  0.0004266   3.924 8.72e-05 ***
maritalmarried   0.0381089  0.0279071   1.366  0.17208    
maritaldivorced -0.0259736  0.0362648  -0.716  0.47386    
maritalwidowed   0.0672727  0.0313316   2.147  0.03179 *  
education        0.0052150  0.0024198   2.155  0.03115 *  
length_stay     -0.0001408  0.0002798  -0.503  0.61490    
unemployed       0.0290821  0.0102758   2.830  0.00465 ** 
sat_health       0.0270394  0.0030486   8.870  < 2e-16 ***
sat_social      -0.0263268  0.0032026  -8.220  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.159 on 74672 degrees of freedom
Multiple R-squared:  0.01028,   Adjusted R-squared:  0.01013 
F-statistic:  70.5 on 11 and 74672 DF,  p-value: < 2.2e-16
summary(happy_increase)

Call:
glm(formula = happy_increase ~ ., family = binomial, data = pandemic3 %>% 
    select(!c("happy_during", "happy_before", "happy_diff")))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.8054  -0.5097  -0.4710  -0.4321   2.3774  

Coefficients:
                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -0.9894196  0.1245823  -7.942 1.99e-15 ***
urban           -0.2949294  0.0256084 -11.517  < 2e-16 ***
female          -0.1311063  0.0266211  -4.925 8.44e-07 ***
age             -0.0026332  0.0011724  -2.246  0.02471 *  
maritalmarried   0.1620582  0.0809362   2.002  0.04525 *  
maritaldivorced -0.0030579  0.1062969  -0.029  0.97705    
maritalwidowed   0.2447544  0.0900995   2.716  0.00660 ** 
education        0.0032011  0.0067282   0.476  0.63423    
length_stay     -0.0023426  0.0007719  -3.035  0.00241 ** 
unemployed       0.0359051  0.0286366   1.254  0.20991    
sat_health      -0.0169927  0.0082930  -2.049  0.04046 *  
sat_social      -0.0972900  0.0082010 -11.863  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 52070  on 74683  degrees of freedom
Residual deviance: 51716  on 74672  degrees of freedom
AIC: 51740

Number of Fisher Scoring iterations: 5
stargazer(happy_before, happy_during, happy_diff, happy_increase)

% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Sun, Apr 23, 2023 - 01:52:08
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lcccc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
 & \multicolumn{4}{c}{\textit{Dependent variable:}} \\ 
\cline{2-5} 
\\[-1.8ex] & happy\_before & happy\_during & happy\_diff & happy\_increase \\ 
\\[-1.8ex] & \textit{OLS} & \textit{OLS} & \textit{OLS} & \textit{logistic} \\ 
\\[-1.8ex] & (1) & (2) & (3) & (4)\\ 
\hline \\[-1.8ex] 
 urban & 0.176$^{***}$ & $-$0.045$^{***}$ & $-$0.221$^{***}$ & $-$0.295$^{***}$ \\ 
  & (0.010) & (0.009) & (0.009) & (0.026) \\ 
  & & & & \\ 
 female & 0.127$^{***}$ & 0.086$^{***}$ & $-$0.041$^{***}$ & $-$0.131$^{***}$ \\ 
  & (0.011) & (0.010) & (0.010) & (0.027) \\ 
  & & & & \\ 
 age & 0.004$^{***}$ & 0.006$^{***}$ & 0.002$^{***}$ & $-$0.003$^{**}$ \\ 
  & (0.0005) & (0.0004) & (0.0004) & (0.001) \\ 
  & & & & \\ 
 maritalmarried & 0.218$^{***}$ & 0.256$^{***}$ & 0.038 & 0.162$^{**}$ \\ 
  & (0.031) & (0.029) & (0.028) & (0.081) \\ 
  & & & & \\ 
 maritaldivorced & $-$0.175$^{***}$ & $-$0.201$^{***}$ & $-$0.026 & $-$0.003 \\ 
  & (0.040) & (0.037) & (0.036) & (0.106) \\ 
  & & & & \\ 
 maritalwidowed & $-$0.034 & 0.033 & 0.067$^{**}$ & 0.245$^{***}$ \\ 
  & (0.034) & (0.032) & (0.031) & (0.090) \\ 
  & & & & \\ 
 education & 0.066$^{***}$ & 0.072$^{***}$ & 0.005$^{**}$ & 0.003 \\ 
  & (0.003) & (0.002) & (0.002) & (0.007) \\ 
  & & & & \\ 
 length\_stay & $-$0.001$^{***}$ & $-$0.001$^{***}$ & $-$0.0001 & $-$0.002$^{***}$ \\ 
  & (0.0003) & (0.0003) & (0.0003) & (0.001) \\ 
  & & & & \\ 
 unemployed & 0.049$^{***}$ & 0.078$^{***}$ & 0.029$^{***}$ & 0.036 \\ 
  & (0.011) & (0.010) & (0.010) & (0.029) \\ 
  & & & & \\ 
 sat\_health & 0.245$^{***}$ & 0.272$^{***}$ & 0.027$^{***}$ & $-$0.017$^{**}$ \\ 
  & (0.003) & (0.003) & (0.003) & (0.008) \\ 
  & & & & \\ 
 sat\_social & 0.252$^{***}$ & 0.226$^{***}$ & $-$0.026$^{***}$ & $-$0.097$^{***}$ \\ 
  & (0.004) & (0.003) & (0.003) & (0.008) \\ 
  & & & & \\ 
 Constant & 3.430$^{***}$ & 3.115$^{***}$ & $-$0.315$^{***}$ & $-$0.989$^{***}$ \\ 
  & (0.050) & (0.046) & (0.045) & (0.125) \\ 
  & & & & \\ 
\hline \\[-1.8ex] 
Observations & 74,684 & 74,684 & 74,684 & 74,684 \\ 
R$^{2}$ & 0.193 & 0.211 & 0.010 &  \\ 
Adjusted R$^{2}$ & 0.193 & 0.211 & 0.010 &  \\ 
Log Likelihood &  &  &  & $-$25,858.220 \\ 
Akaike Inf. Crit. &  &  &  & 51,740.440 \\ 
Residual Std. Error (df = 74672) & 1.266 & 1.184 & 1.159 &  \\ 
F Statistic (df = 11; 74672) & 1,626.859$^{***}$ & 1,816.459$^{***}$ & 70.496$^{***}$ &  \\ 
\hline 
\hline \\[-1.8ex] 
\textit{Note:}  & \multicolumn{4}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
\end{tabular} 
\end{table}