Lecture 2 Diagnostics and Model Evaluation Colin Rundel 1/23/2018 1
Some more linear models 2
Linear model and data ggplot (d, aes (x=x,y=y)) + 3 geom_smooth (method=”lm”, color=”blue”, se = FALSE) geom_line () + 7.5 5.0 2.5 y 0.0 −2.5 0 25 50 75 100 x
Linear model ## --- -3.32 0.00126 ** ## x 0.068409 0.005335 12.82 < 2e-16 *** ## Signif. codes: ## (Intercept) -1.030315 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.54 on 98 degrees of freedom ## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6227 ## F-statistic: 164.4 on 1 and 98 DF, p-value: < 2.2e-16 0.310326 Estimate Std. Error t value Pr(>|t|) l = lm (y ~ x, data=d) Min summary (l) ## ## Call: ## lm(formula = y ~ x, data = d) ## ## Residuals: ## 1Q ## Median 3Q Max ## -2.6041 -1.2142 -0.1973 1.1969 3.7072 ## ## Coefficients: 4
Bayesian model specification (JAGS) for(j in 1:2){ }” sigma2 = 1/tau tau ~ dgamma(1, 1) # Prior for sigma / tau2 } beta[j] ~ dnorm(0,1/100) # Prior for beta model = } mu[i] = beta[1] + beta[2]*x[i] y[i] ~ dnorm(mu[i], tau) for(i in 1:length(y)){ # Likelihood ”model{ 5
Bayesian model fitting (JAGS) ..- attr(*, ”mcpar”)= num [1:3] 1001 6000 1 ## ..- attr(*, ”mcpar”)= num [1:3] 1001 6000 1 ## $ : mcmc [1:5000, 1:3] -0.927 -1.5 -1.591 -1.726 -1.445 ... ## ..- attr(*, ”dimnames”)=List of 2 ## ## ## $ : mcmc [1:5000, 1:3] -1.161 -1.179 -1.089 -1.099 -0.927 ... ## ..- attr(*, ”dimnames”)=List of 2 ## ..- attr(*, ”mcpar”)= num [1:3] 1001 6000 1 ## - attr(*, ”class”)= chr ”mcmc.list” ..- attr(*, ”dimnames”)=List of 2 $ : mcmc [1:5000, 1:3] -0.602 -0.175 -0.397 -0.555 -0.54 ... n_burn = 1000; n_iter = 5000 ## m = rjags:: jags.model ( textConnection (model), data=d, quiet=TRUE, n.chains = 4 ) update (m, n.iter=n_burn, progress.bar=”none”) samp = rjags:: coda.samples ( n.iter=n_iter, progress.bar=”none” ) str (samp, max.level=1) ## List of 4 ## $ : mcmc [1:5000, 1:3] -1.051 -1.154 -1.363 -0.961 -0.775 ... ## ..- attr(*, ”dimnames”)=List of 2 ## ..- attr(*, ”mcpar”)= num [1:3] 1001 6000 1 6 m, variable.names= c (”beta”,”sigma2”),
coda plot (samp) 7 Trace of beta[1] Density of beta[1] −2.5 0.0 1000 2000 3000 4000 5000 6000 −2.5 −2.0 −1.5 −1.0 −0.5 0.0 Iterations N = 5000 Bandwidth = 0.04559 Trace of beta[2] Density of beta[2] 0.05 0 1000 2000 3000 4000 5000 6000 0.05 0.06 0.07 0.08 0.09 Iterations N = 5000 Bandwidth = 0.0007844 Trace of sigma2 Density of sigma2 1.5 0.0 1000 2000 3000 4000 5000 6000 2 3 4 5 Iterations N = 5000 Bandwidth = 0.05006
tidybayes <int> <int> <chr> 4 ## 2 1.67 sigma2 NA sigma2 4995 4 ## 1 <dbl> <chr> <int> NA sigma2 ## estimate parameter i term .chain .iteration ## ## # Groups: parameter, .chain [1] ## # A tibble: 6 x 6 tail (df_mcmc) 4996 2.68 sigma2 2 beta 4 2.00 sigma2 NA sigma2 5000 4 ## 6 2.05 sigma2 NA sigma2 4999 ## 5 ## 3 1.93 sigma2 NA sigma2 4998 4 ## 4 2.58 sigma2 NA sigma2 4997 4 0.0713 beta[2] 3 df_mcmc = tidybayes:: gather_samples (samp, beta[i], sigma2) %>% <int> beta[1] -1.05 1 beta 1 1 ## 1 <dbl> <chr> <int> <int> <chr> ## 1 estimate parameter i term .chain .iteration ## ## # Groups: parameter, .chain [2] ## # A tibble: 6 x 6 head (df_mcmc) group_by (parameter, .chain) mutate (parameter = paste0 (term, ifelse ( is.na (i),””, paste0 (”[”,i,”]”)))) %>% ## 2 1 1 2 beta ## 6 beta[1] -1.36 1 beta 3 1 ## 5 0.0726 beta[2] 2 2 beta 1 ## 4 beta[1] -1.15 1 beta 2 1 ## 3 0.0645 beta[2] 8
Posterior plots ggplot (df_mcmc, aes (fill= as.factor (.chain), group=.chain, x=estimate)) + 9 facet_wrap (~ parameter,scales = ”free”) geom_density (alpha=0.5, color=NA) + beta[1] beta[2] sigma2 80 1.25 1.00 60 1.0 as.factor(.chain) 0.75 1 density 40 2 3 0.50 0.5 4 20 0.25 0.0 0 0.00 −2 −1 0 0.05 0.06 0.07 0.08 0.09 2 3 4 5 estimate
Trace plots geom_smooth (method=”loess”) + labs (color=”chain”) df_mcmc %>% filter (.iteration <= 500) %>% 10 facet_grid (parameter~.chain, scale=”free_y”) + ggplot ( aes (x=.iteration, y=estimate, color= as.factor (.chain))) + geom_line (alpha=0.5) + 1 2 3 4 0.0 −0.5 beta[1] −1.0 −1.5 −2.0 chain 0.08 1 estimate beta[2] 0.07 2 0.06 3 0.05 4 4.0 3.5 sigma2 3.0 2.5 2.0 1.5 0 100 200 300 400 500 0 100 200 300 400 500 0 100 200 300 400 500 0 100 200 300 400 500 .iteration
Credible Intervals 0.0613 0.0753 0.800 0.0621 0.0686 3 7 beta[2] ## 0.0752 0.800 0.0681 8 beta[2] 2 6 beta[2] ## 0.0755 0.800 0.0613 0.0683 1 ## 4 ## 0.800 ## # ... with 14 more rows 0.800 2.80 1.95 2.40 2 ## 10 sigma2 2.81 0.0687 1.96 2.40 1 9 sigma2 ## 0.0755 0.800 0.0621 5 beta[2] 0.800 df_ci = tidybayes:: mean_hdi (df_mcmc, estimate, .prob= c (0.8, 0.95)) <dbl> -1.40 -1.02 1 1 beta[1] ## <dbl> <dbl> <dbl> <int> 0.800 <chr> ## parameter .chain estimate conf.low conf.high .prob ## ## # Groups: parameter, .chain [12] ## # A tibble: 24 x 6 df_ci -0.587 ## -0.656 -1.44 -1.44 -1.04 4 4 beta[1] ## 0.800 -0.656 -1.04 2 beta[1] 3 3 beta[1] ## 0.800 -0.634 -1.44 -1.01 2 11
Aside - mean_qi vs mean_hdi These differ in the use of the quantile interval vs. the highest-density interval. 12
Aside - mean_qi vs mean_hdi These differ in the use of the quantile interval vs. the highest-density 12 interval. dist_1 dist_2 dist_3 4 3 hpd 2 1 prob density 0 0.5 0.95 4 3 qi 2 1 0 0.0 0.5 1.0 1.5 2.0 0.0 0.5 1.0 1.5 2.0 0.0 0.5 1.0 1.5 2.0 x
Caterpillar Plots ylim (0.5,4.5) df_ci %>% 13 tidybayes:: geom_pointintervalh () + ggplot ( aes (x=estimate, y=.chain, color= as.factor (.chain))) + facet_grid (parameter~.) + 4 beta[1] 3 2 1 4 .chain 3 beta[2] 2 1 4 sigma2 3 2 1 −1 0 1 2 3 estimate
Prediction 14
Revised model for(j in 1:2){ }” sigma2 = 1/tau tau ~ dgamma(1, 1) # Prior for sigma / tau2 } beta[j] ~ dnorm(0,1/100) # Prior for beta model_pred = } y_pred[i] ~ dnorm(mu[i], tau) y[i] ~ dnorm(mu[i], tau) mu[i] = beta[1] + beta[2]*x[i] for(i in 1:length(y)){ # Likelihood ”model{ 15
Revised fitting n_burn = 1000; n_iter = 5000 m = rjags:: jags.model ( textConnection (model_pred), data=d, quiet=TRUE, n.chains = 1 ) update (m, n.iter=n_burn, progress.bar=”none”) pred = rjags:: coda.samples ( n.iter=n_iter, progress.bar=”none” ) 16 m, variable.names= c (”beta”,”sigma2”,”mu”,”y_pred”,”y”,”x”),
Predictions -0.543 -1.89 7.00 -0.206 -2.09 1.93 7 1 1 7 ## 6.00 -0.264 8 0.758 -0.807 6 1 1 6 ## -1.56 5.00 -0.322 -1.88 5 -1.29 ## 1 1 9.00 -0.0900 ## # ... with 499,990 more rows 2.01 -0.0320 10.0 1.98 10 -0.515 1 1 ## 10 0.423 0.333 1 9 -1.20 1 1 9 ## -0.0794 8.00 -0.148 -0.227 3.00 8 1 5 df_pred = tidybayes:: spread_samples (pred, y_pred[i], y[i], x[i], mu[i]) %>% ## 1 1 1 ## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int> <int> resid 1.00 -0.554 mu x y i y_pred .chain .iteration ## ## # Groups: i [100] ## # A tibble: 500,000 x 8 df_pred mutate (resid = y - mu) 1 -0.858 -3.24 -2.69 ## 0.340 -2.59 -2.69 4.00 -0.380 -3.07 4 -2.69 1 1 4 ## -2.16 3.00 -0.438 3 ## 1 1 3 ## -1.51 2.00 -0.496 2 -0.638 -2.00 1 1 2 17
𝜈 vs $y_{pred} df_pred %>% ungroup () %>% filter (i %in% c (1,50,100)) %>% select (i, mu, y_pred) %>% tidyr:: gather (param, val, -i) %>% 18 ggplot ( aes (x=val, fill=param)) + geom_density (alpha=0.5) + facet_wrap (~i) 1 50 100 2.5 2.0 param 1.5 density mu y_pred 1.0 0.5 0.0 −5 0 5 10 −5 0 5 10 −5 0 5 10 val
Predictions geom_point (data=d, aes (y=y)) df_pred %>% ungroup () %>% filter (.iteration <= 200) %>% 19 geom_line ( aes (y=mu, group=.iteration), color=”blue”, alpha=0.1) + ggplot ( aes (x=x)) + 7.5 5.0 2.5 mu 0.0 −2.5 0 25 50 75 100 x
Posterior distribution ( 𝜈 ) geom_point (data=d, aes (y=y)) df_pred %>% ungroup () %>% 20 ggplot ( aes (x=x)) + tidybayes:: stat_lineribbon ( aes (y=mu), alpha=0.5) + 7.5 5.0 0.95 2.5 mu 0.8 0.5 0.0 −2.5 0 25 50 75 100 x
Posterior predictive distribution ( 𝑧 𝑞𝑠𝑓𝑒 ) geom_point (data=d, aes (y=y)) df_pred %>% ungroup () %>% 21 tidybayes:: stat_lineribbon ( aes (y=y_pred), alpha=0.5) + ggplot ( aes (x=x)) + 5 0.95 y_pred 0.8 0.5 0 0 25 50 75 100 x
Residual plot df_pred %>% ungroup () %>% ggplot ( aes (x=x, y=resid)) + geom_boxplot ( aes (group=x), outlier.alpha = 0.2) 22 4 2 resid 0 −2 0 25 50 75 100 x
Model Evaluation 23
( ̂ (𝑍 𝑗 − 𝑍 𝑗 − (𝑍 𝑗 − 𝑗=1 ( ̂ Var ( ̂ 𝑍 𝑗 − 𝑆 2 = 𝐙) 2 = 2 = Cor (𝐙, 𝑗=1 (𝑍 𝑗 − 2 Error ∑ 𝑜 ̄ Model assessment? 𝑍 ) 2 𝑍 𝑗 ) 𝑜 ̄ 𝑍 ) ̂ 𝐙) Var (𝐙) ∑ 𝑗=1 ̂ = which gives us the variability in 𝑍 explained by our model. Quick review: 𝑜 ∑ 𝑗=1 ̄ 𝑍 ) 2 Total 𝑜 If we think back to our first regression class, one common option is 𝑆 2 ∑ 𝑗=1 ̄ 𝑍 ) 2 Model + 𝑜 ∑ 24
Recommend
More recommend