#### EXERCICIO 8 - OLIDAN POCIUS #### GALILEU init.h = c(600, 700, 800, 950, 1100, 1300, 1500) h.d = c(253, 337, 395, 451, 495, 534, 573) plot(h.d~init.h) ## modelo linear modelo.0 <-lm(h.d~1) summary(modelo.0) abline(modelo.0) modelo.1 <-lm(h.d~init.h) abline(modelo.1,, col="green") modelo.2 <- lm(h.d~init.h+I(init.h^2)) cf.m2 <- coef(modelo.2) curve(cf.m2[1]+cf.m2[2]*x+cf.m2[3]*x^2, add=T, lty=2, col="red") modelo.3 <-lm(h.d~init.h+I(init.h^2)+I(init.h^3)) cf.m3 <- coef(modelo.3) curve(cf.m3[1]+cf.m3[2]*x+cf.m3[3]*x^2+cf.m3[4]*x^3,add=T, lty=2, col="blue") anova(modelo.0,modelo.1,modelo.2,modelo.3) #Analysis of Variance Table #Model 1: h.d ~ 1 #Model 2: h.d ~ init.h #Model 3: h.d ~ init.h + I(init.h^2) #Model 4: h.d ~ init.h + I(init.h^2) + I(init.h^3) # Res.Df RSS Df Sum of Sq F Pr(>F) #1 6 77022 #2 5 5671 1 71351 4435.98 7.458e-06 *** #3 4 744 1 4927 306.33 0.0004065 *** #4 3 48 1 696 43.26 0.0071503 ** ### A adiçao de mais um termo melhora significativamente o modelo, summary(modelo.1) #Multiple R-squared: 0.9264, Adjusted R-squared: 0.9116 summary(modelo.2) #Multiple R-squared: 0.9903, Adjusted R-squared: 0.9855 summary(modelo.3) #Multiple R-squared: 0.9994, Adjusted R-squared: 0.9987 ### porém a melhora é pequena, pouco mais de 1% de variaçao explicada ############################################################ em comparaçao com o modelo 2. ### Massa de Recém-Nascidos RN.original<-read.table("babies.txt",head=TRUE,as.is=TRUE) RN.original str(RN.original) RN<-RN.original[RN.original$bwt!=999 & RN.original$gestation!=999 & RN.original$parity!=9 & RN.original$height!=99 & RN.original$smoke!=9 &RN.original$age!=99& RN.original$weight!=999,] str(RN) RN$parity <- as.logical(RN$parity) m.1<-lm(bwt~gestation, data=RN) summary(m.1) #Multiple R-squared: 0.1661, Adjusted R-squared: 0.1654 #F-statistic: 233.4 on 1 and 1172 DF, p-value: < 2.2e-16 m.2<-lm(bwt~parity,data=RN) summary(m.2) #Residual standard error: 18.32 on 1172 degrees of freedom #Multiple R-squared: 0.001928, Adjusted R-squared: 0.001076 ### Não sig. #F-statistic: 2.264 on 1 and 1172 DF, p-value: 0.1327 m.3<-lm(bwt~age,data=RN) summary(m.3) #Multiple R-squared: 0.0007281, Adjusted R-squared: -0.0001245 ### Não sig. #F-statistic: 0.8539 on 1 and 1172 DF, p-value: 0.3556 m.4<-lm(bwt~height,data=RN) summary(m.4) #Multiple R-squared: 0.0415, Adjusted R-squared: 0.04068 #F-statistic: 50.74 on 1 and 1172 DF, p-value: 1.838e-12 m.5<-lm(bwt~weight,data=RN) summary(m.5) #Multiple R-squared: 0.02431, Adjusted R-squared: 0.02348 #F-statistic: 29.2 on 1 and 1172 DF, p-value: 7.887e-08 m.6<-lm(bwt~smoke,data=RN) summary(m.6) #Multiple R-squared: 0.06091, Adjusted R-squared: 0.06011 #F-statistic: 76.02 on 1 and 1172 DF, p-value: < 2.2e-16 ### SEM INTERAÇÃO, somente as inicialmente sig. modelo1<-lm(bwt~gestation+smoke+height+weight,data=RN) summary(modelo1) #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -77.25871 14.05139 -5.498 4.71e-08 *** #gestation 0.43718 0.02909 15.028 < 2e-16 *** #smoke -8.34833 0.95453 -8.746 < 2e-16 *** #height 1.09733 0.20463 5.363 9.88e-08 *** #weight 0.05981 0.02491 2.401 0.0165 * #Residual standard error: 15.88 on 1169 degrees of freedom #Multiple R-squared: 0.2519, Adjusted R-squared: 0.2493 #F-statistic: 98.39 on 4 and 1169 DF, p-value: < 2.2e-16 ### SEM INTERAÇAO, todas modelo1b<-lm(bwt~gestation+smoke+height+weight+parity+age,data=RN) summary(modelo1b) #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -80.41085 14.34657 -5.605 2.60e-08 *** #gestation 0.44398 0.02910 15.258 < 2e-16 *** #smoke -8.40073 0.95382 -8.807 < 2e-16 *** #height 1.15402 0.20502 5.629 2.27e-08 *** #weight 0.05017 0.02524 1.987 0.04711 * #parityTRUE -3.32720 1.12895 -2.947 0.00327 ** #age -0.00895 0.08582 -0.104 0.91696 ### age não sig. #Residual standard error: 15.83 on 1167 degrees of freedom #Multiple R-squared: 0.258, Adjusted R-squared: 0.2541 #F-statistic: 67.61 on 6 and 1167 DF, p-value: < 2.2e-16 ### SEM INTERAÇAO, SEM age modelo1c<-lm(bwt~gestation+smoke+height+weight+parity,data=RN) summary(modelo1c) #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -80.71321 14.04465 -5.747 1.16e-08 *** #gestation 0.44408 0.02907 15.276 < 2e-16 *** #smoke -8.39390 0.95117 -8.825 < 2e-16 *** #height 1.15497 0.20473 5.641 2.11e-08 *** #weight 0.04983 0.02503 1.991 0.04672 * #parityTRUE -3.28762 1.06281 -3.093 0.00203 ** #Residual standard error: 15.82 on 1168 degrees of freedom #Multiple R-squared: 0.2579, Adjusted R-squared: 0.2548 #F-statistic: 81.2 on 5 and 1168 DF, p-value: < 2.2e-16 ### SEM INTERAÇAO, SEM parity modelo1d<-lm(bwt~gestation+smoke+height+weight+age,data=RN) summary(modelo1d) #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -80.18172 14.39348 -5.571 3.15e-08 *** #gestation 0.43872 0.02914 15.056 < 2e-16 *** #smoke -8.29494 0.95628 -8.674 < 2e-16 *** #height 1.11130 0.20518 5.416 7.38e-08 *** #weight 0.05597 0.02525 2.217 0.0268 * #age 0.07607 0.08109 0.938 0.3484 ##### age não é sig. #Residual standard error: 15.88 on 1168 degrees of freedom #Multiple R-squared: 0.2524, Adjusted R-squared: 0.2492 #F-statistic: 78.88 on 5 and 1168 DF, p-value: < 2.2e-16 ###### COMPARAÇOES ENTRE OS MODELOS SEM INTERAÇAO anova(modelo1,modelo1c,modelo1b) #Analysis of Variance Table #Model 1: bwt ~ gestation + smoke + height + weight #Model 2: bwt ~ gestation + smoke + height + weight + parity #Model 3: bwt ~ gestation + smoke + height + weight + parity + age # Res.Df RSS Df Sum of Sq F Pr(>F) #1 1169 294808 #2 1168 292412 1 2395.54 9.5606 0.002035 ** ### SIG. #3 1167 292409 1 2.73 0.0109 0.916956 ### SEM CONSIDERAR INTERAÇOES O MELHOR MODELO È: modelo1c<-lm(bwt~gestation+smoke+height+weight+parity,data=RN) #### COM INTERAÇOES ####