Vous êtes sur la page 1sur 4

warm3.

# we are going to get out a doubledecker, and run a regression.


# for the doubledecker, we need the package 'vcd'.

library(vcd)

# as usual, we get our data with 'load' and 'attach'

load("N:/48-308/Arnold/tea2.Rdata")
attach(tea2)

# we will have a quick look at our variables before creating


# the doubledecker, but since they are dichotomies we need
# only get their raw frequencies to understand their distributions
# there is nothing wrong with getting a bar chart, or a mean,
# but in the case of a dichotomy these provide no new information.

# the three variables are


# taste (in music) 1 = non-classical, 2 = classical
# ed (education) 1 = low, 2 = high
# age 1 = younger, 2 = older

table(taste)
table(age)
table(ed)

# we will also have a quick look at the two-way


# tables. we place the dependent variable first,
# in the 'table' command. here we'll just look at
# the results for taste by education

table(taste,ed)
table(taste,age)
table(ed,age)

# ed
# taste 1 2
# 1 1020 596
# 2 280 404

# we will want to see whether the effect of education on


# taste depends on age. we will use Q for this purpose.
# since it equals gamma for 2 x 2 tables, we could use
# the GKgamma function in another package, but it is easy
# to calculate Q. we can either use the formula (ad - bc) / (ad + bc)
# or get the odds ratio and use (OR - 1) / (OR + 1)
# since the OR can be of interest in its own right, we'll
# go that way.

OR <- (1020*404) / (596*280)


Q <- (OR - 1) / (OR + 1)

OR # 2.469319
Q # .423518 - clearly, high education is linked to classical music

# to get separate tables for the categories of age, we use


# the table command, but restrict the value of age

table(taste[age == 1],ed[age == 1])

# 1 2
# 1 290 406
# 2 110 194

table(taste[age == 2],ed[age == 2])

# 1 730 190
# 2 170 210

# we will get the two ORs and Qs before going to the doubledecker

OR1 <- (290*194) / (406*110)


Q1 <- (OR1 - 1) / (OR1 + 1)

OR1 # 1.25974
Q1 # .1149425 Q for the younger age group is down considerably
# from its value for the full sample

OR2 <- (730*210) / (190*170)


Q2 <- (OR2 - 1) / (OR2 + 1)

OR2 # 4.74613
Q2 # .6519397 Q for the older age group is much higher.

doubledecker(taste ~ age + ed)

# for regression, we'll use a data file on the course disk

load("N:/48-308/Arnold/socy211.Rdata")
attach(socy211)

# two of the variables, prevdrop and mathcon2, are dichotomous,


# so we can get a sense of their distributions just by using the
# table command
# since nothing very important shows up, the
# output will be omitted here.

table(prevdrop)
table(mathcon2)

# the others are all discrete-continuous, so we'll get histograms


# since we know what these are about, we'll omit them here.

hist(mathphob, breaks = 14)


hist(thinkfee, breaks = 19)
hist(auditory, breaks = 12)
hist(feelings, breaks = 12)
hist(sensintu, breaks = 12)

# since nothing appears problematic, we'll run a linear


# model. we'll save the results in 'mp', standing for
# MathPhobia.

mp <- lm(mathphob ~ prevdrop + mathcon2 + thinkfee +


auditory + feelings + sensintu)
summary(mp)

#summary(mp)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 12.25871 12.28433 0.998 0.32094
# prevdrop 15.72806 3.24274 4.850 5.0e-06 ***
# mathcon2 18.12507 3.36790 5.382 5.6e-07 ***
# thinkfee 0.21157 0.09176 2.306 0.02338 *
# auditory 0.80218 0.25883 3.099 0.00257 **
# feelings -0.67540 0.30353 -2.225 0.02852 *
# sensintu -0.10953 0.19646 -0.558 0.57853
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

# Residual standard error: 14.01 on 92 degrees of freedom


(8 observations deleted due to missingness)
# Multiple R-squared: 0.5366, Adjusted R-squared: 0.5064
# F-statistic: 17.76 on 6 and 92 DF, p-value: 1.45e-13

# we have one non-significant predictor, sensintu, so


# we run our equation again, this time without it.
# we rename our output mp2, so we don't overwrite our
# initial results
mp2 <- lm(mathphob ~ prevdrop + mathcon2 + thinkfee +
auditory + feelings)
summary(mp2)

# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 11.1743 11.1061 1.006 0.31688
# prevdrop 16.8046 3.1941 5.261 8.70e-07 ***
# mathcon2 16.7782 3.2862 5.106 1.67e-06 ***
# thinkfee 0.1927 0.0877 2.198 0.03038 *
# auditory 0.8100 0.2607 3.108 0.00248 **
# feelings -0.6828 0.3029 -2.254 0.02645 *
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

# Residual standard error: 14.18 on 96 degrees of freedom


(5 observations deleted due to missingness)
# Multiple R-squared: 0.5194, Adjusted R-squared: 0.4944
# F-statistic: 20.75 on 5 and 96 DF, p-value: 5.325e-14

Vous aimerez peut-être aussi