Vous êtes sur la page 1sur 82

Lista 1

Guilherme Fernandes
March 29, 2019

Exercício 1

library(faraway)

## Warning: package 'faraway' was built under R version 3.5.3

data(prostate)
head(prostate,n=10)

## lcavol lweight age lbph svi lcp gleason pgg45 lpsa


## 1 -0.5798185 2.7695 50 -1.386294 0 -1.38629 6 0 -0.43078
## 2 -0.9942523 3.3196 58 -1.386294 0 -1.38629 6 0 -0.16252
## 3 -0.5108256 2.6912 74 -1.386294 0 -1.38629 7 20 -0.16252
## 4 -1.2039728 3.2828 58 -1.386294 0 -1.38629 6 0 -0.16252
## 5 0.7514161 3.4324 62 -1.386294 0 -1.38629 6 0 0.37156
## 6 -1.0498221 3.2288 50 -1.386294 0 -1.38629 6 0 0.76547
## 7 0.7371641 3.4735 64 0.615186 0 -1.38629 6 0 0.76547
## 8 0.6931472 3.5395 58 1.536867 0 -1.38629 6 0 0.85442
## 9 -0.7765288 3.5395 47 -1.386294 0 -1.38629 6 0 1.04732
## 10 0.2231436 3.2445 63 -1.386294 0 -1.38629 6 0 1.04732

tail(prostate,n=10)

## lcavol lweight age lbph svi lcp gleason pgg45 lpsa


## 88 1.731656 3.3690 62 -1.386294 1 0.30010 7 30 3.71235
## 89 2.807594 4.7181 65 -1.386294 1 2.46385 7 60 3.98434
## 90 1.562346 3.6951 76 0.936093 1 0.81093 7 75 3.99360
## 91 3.246491 4.1018 68 -1.386294 0 -1.38629 6 0 4.02981
## 92 2.532903 3.6776 61 1.348073 1 -1.38629 7 15 4.12955
## 93 2.830268 3.8764 68 -1.386294 1 1.32176 7 60 4.38515
## 94 3.821004 3.8969 44 -1.386294 1 2.16905 7 40 4.68444
## 95 2.907447 3.3962 52 -1.386294 1 2.46385 7 10 5.14312
## 96 2.882564 3.7739 68 1.558145 1 1.55814 7 80 5.47751
## 97 3.471967 3.9750 68 0.438255 1 2.90417 7 20 5.58293

summary(prostate)

## lcavol lweight age lbph


## Min. :-1.3471 Min. :2.375 Min. :41.00 Min. :-1.3863
## 1st Qu.: 0.5128 1st Qu.:3.376 1st Qu.:60.00 1st Qu.:-1.3863
## Median : 1.4469 Median :3.623 Median :65.00 Median : 0.3001
## Mean : 1.3500 Mean :3.653 Mean :63.87 Mean : 0.1004
## 3rd Qu.: 2.1270 3rd Qu.:3.878 3rd Qu.:68.00 3rd Qu.: 1.5581

1
## Max. : 3.8210 Max. :6.108 Max. :79.00 Max. : 2.3263
## svi lcp gleason pgg45
## Min. :0.0000 Min. :-1.3863 Min. :6.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:-1.3863 1st Qu.:6.000 1st Qu.: 0.00
## Median :0.0000 Median :-0.7985 Median :7.000 Median : 15.00
## Mean :0.2165 Mean :-0.1794 Mean :6.753 Mean : 24.38
## 3rd Qu.:0.0000 3rd Qu.: 1.1786 3rd Qu.:7.000 3rd Qu.: 40.00
## Max. :1.0000 Max. : 2.9042 Max. :9.000 Max. :100.00
## lpsa
## Min. :-0.4308
## 1st Qu.: 1.7317
## Median : 2.5915
## Mean : 2.4784
## 3rd Qu.: 3.0564
## Max. : 5.5829

##Conjunto de histogramas
hist(prostate$lcavol)

Histogram of prostate$lcavol
20
15
Frequency

10
5
0

−1 0 1 2 3 4

prostate$lcavol

hist(prostate$lweight)

2
Histogram of prostate$lweight
40
30
Frequency

20
10
0

2 3 4 5 6

prostate$lweight

hist(prostate$age)

3
Histogram of prostate$age
30
25
20
Frequency

15
10
5
0

40 50 60 70 80

prostate$age

hist(prostate$lbph)

4
Histogram of prostate$lbph
40
30
Frequency

20
10
0

−1 0 1 2

prostate$lbph

hist(prostate$svi)

5
Histogram of prostate$svi
60
Frequency

40
20
0

0.0 0.2 0.4 0.6 0.8 1.0

prostate$svi

hist(prostate$lcp)

6
Histogram of prostate$lcp
40
30
Frequency

20
10
0

−1 0 1 2 3

prostate$lcp

hist(prostate$gleason)

7
Histogram of prostate$gleason
50
40
Frequency

30
20
10
0

6.0 6.5 7.0 7.5 8.0 8.5 9.0

prostate$gleason

hist(prostate$pgg45)

8
Histogram of prostate$pgg45
40
30
Frequency

20
10
0

0 20 40 60 80 100

prostate$pgg45

hist(prostate$lpsa)

9
Histogram of prostate$lpsa
40
30
Frequency

20
10
0

−1 0 1 2 3 4 5 6

prostate$lpsa

#Conjunto de gráficos de dispersão


plot(prostate$lcavol)

10
4
3
prostate$lcavol

2
1
0
−1

0 20 40 60 80 100

Index

plot(prostate$lweight)

11
6
prostate$lweight

5
4
3

0 20 40 60 80 100

Index

plot(prostate$age)

12
80
70
prostate$age

60
50
40

0 20 40 60 80 100

Index

plot(prostate$lbph)

13
2
prostate$lbph

1
0
−1

0 20 40 60 80 100

Index

plot(prostate$svi)

14
1.0
0.8
prostate$svi

0.6
0.4
0.2
0.0

0 20 40 60 80 100

Index

plot(prostate$lcp)

15
3
2
prostate$lcp

1
0
−1

0 20 40 60 80 100

Index

plot(prostate$gleason)

16
9.0
8.5
prostate$gleason

8.0
7.5
7.0
6.5
6.0

0 20 40 60 80 100

Index

plot(prostate$pgg45)

17
100
80
prostate$pgg45

60
40
20
0

0 20 40 60 80 100

Index

plot(prostate$lpsa)

18
5
4
prostate$lpsa

3
2
1
0

0 20 40 60 80 100

Index

##Conjunto de densidades
plot(density(prostate$lcavol))

19
density.default(x = prostate$lcavol)
0.30
0.20
Density

0.10
0.00

−2 0 2 4

N = 97 Bandwidth = 0.4249

plot(density(prostate$lweight))

20
density.default(x = prostate$lweight)
0.8
0.6
Density

0.4
0.2
0.0

2 3 4 5 6

N = 97 Bandwidth = 0.1352

plot(density(prostate$age))

21
density.default(x = prostate$age)
0.06
0.04
Density

0.02
0.00

40 50 60 70 80

N = 97 Bandwidth = 2.152

plot(density(prostate$lbph))

22
density.default(x = prostate$lbph)
0.30
0.20
Density

0.10
0.00

−3 −2 −1 0 1 2 3 4

N = 97 Bandwidth = 0.523

plot(density(prostate$svi))

23
density.default(x = prostate$svi)
2.0
1.5
Density

1.0
0.5
0.0

−0.5 0.0 0.5 1.0 1.5

N = 97 Bandwidth = 0.1492

plot(density(prostate$lcp))

24
density.default(x = prostate$lcp)
0.4
0.3
Density

0.2
0.1
0.0

−2 0 2 4

N = 97 Bandwidth = 0.504

plot(density(prostate$gleason))

25
density.default(x = prostate$gleason)
0.8
0.6
Density

0.4
0.2
0.0

6 7 8 9

N = 97 Bandwidth = 0.2603

plot(density(prostate$pgg45))

26
density.default(x = prostate$pgg45)
0.020
0.015
Density

0.010
0.005
0.000

0 50 100

N = 97 Bandwidth = 10.17

plot(density(prostate$lpsa))

27
density.default(x = prostate$lpsa)
0.3
Density

0.2
0.1
0.0

0 2 4 6

N = 97 Bandwidth = 0.3564

#Conjunto de gráficos ordenados


barplot(sort(prostate$lcavol))

28
3
2
1
0
−1

barplot(sort(prostate$lweight))

29
6
5
4
3
2
1
0

barplot(sort(prostate$age))

30
60
40
20
0

barplot(sort(prostate$lbph))

31
2.0
1.0
0.0
−1.0

barplot(sort(prostate$svi))

32
1.0
0.8
0.6
0.4
0.2
0.0

barplot(sort(prostate$lcp))

33
2
1
0
−1

barplot(sort(prostate$gleason))

34
8
6
4
2
0

barplot(sort(prostate$pgg45))

35
100
80
60
40
20
0

barplot(sort(prostate$lpsa))

36
5
4
3
2
1
0

##Conjunto de retas de regressão da primeira variavel


linear1<-lm(prostate$lcavol~prostate$lweight)
coef(linear1)

## (Intercept) prostate$lweight
## -0.3328410 0.4607156

plot(prostate$lcavol~prostate$lweight)
abline(linear1,col=2)

37
4
3
prostate$lcavol

2
1
0
−1

3 4 5 6

prostate$lweight

linear2<-lm(prostate$lcavol~prostate$age)
coef(linear2)

## (Intercept) prostate$age
## -0.92485702 0.03561938

plot(prostate$lcavol~prostate$age)
abline(linear2,col=3)

38
4
3
prostate$lcavol

2
1
0
−1

40 50 60 70 80

prostate$age

linear3<-lm(prostate$lcavol~prostate$lbph)
coef(linear3)

## (Intercept) prostate$lbph
## 1.34777980 0.02221871

plot(prostate$lcavol~prostate$lbph)
abline(linear3,col=4)

39
4
3
prostate$lcavol

2
1
0
−1

−1 0 1 2

prostate$lbph

linear4<-lm(prostate$lcavol~prostate$lcp)
coef(linear4)

## (Intercept) prostate$lcp
## 1.4521105 0.5692395

plot(prostate$lcavol~prostate$lcp)
abline(linear4,col=5)

40
4
3
prostate$lcavol

2
1
0
−1

−1 0 1 2 3

prostate$lcp

linear5<-lm(prostate$lcavol~prostate$pgg45)
coef(linear5)

## (Intercept) prostate$pgg45
## 0.90816923 0.01812199

plot(prostate$lcavol~prostate$pgg45)
abline(linear5,col=6)

41
4
3
prostate$lcavol

2
1
0
−1

0 20 40 60 80 100

prostate$pgg45

linear6<-lm(prostate$lcavol~prostate$lpsa)
coef(linear6)

## (Intercept) prostate$lpsa
## -0.5085802 0.7499191

plot(prostate$lcavol~prostate$lpsa)
abline(linear6,col=7)

42
4
3
prostate$lcavol

2
1
0
−1

0 1 2 3 4 5

prostate$lpsa

Exercício 2

library(faraway)
data(divusa)
head(divusa,n=10)

## year divorce unemployed femlab marriage birth military


## 1 1920 8.0 5.2 22.70 92.0 117.9 3.2247
## 2 1921 7.2 11.7 22.79 83.0 119.8 3.5614
## 3 1922 6.6 6.7 22.88 79.7 111.2 2.4553
## 4 1923 7.1 2.4 22.97 85.2 110.5 2.2065
## 5 1924 7.2 5.0 23.06 80.3 110.9 2.2889
## 6 1925 7.2 3.2 23.15 79.2 106.6 2.1735
## 7 1926 7.5 1.8 23.24 78.7 102.6 2.1073
## 8 1927 7.8 3.3 23.33 77.0 99.8 2.0913
## 9 1928 7.8 4.2 23.42 74.1 93.8 2.0821
## 10 1929 8.0 3.2 23.51 75.5 89.3 2.0944

tail(divusa,n=10)

## year divorce unemployed femlab marriage birth military


## 68 1987 20.8 6.2 56.0 55.7 65.7 8.9737
## 69 1988 20.7 5.5 56.6 54.6 67.2 8.7453

43
## 70 1989 20.4 5.3 57.4 54.2 69.2 8.6307
## 71 1990 20.9 5.6 57.5 54.5 70.9 8.1924
## 72 1991 20.9 6.8 57.4 54.2 69.6 7.8744
## 73 1992 21.2 7.5 57.8 53.3 68.9 7.0862
## 74 1993 20.5 6.9 57.9 52.3 67.6 6.6145
## 75 1994 20.5 6.1 58.8 51.5 66.7 6.1865
## 76 1995 19.8 5.6 58.9 50.8 65.6 5.7770
## 77 1996 19.5 5.4 59.3 49.7 65.3 5.5488

summary(divusa)

## year divorce unemployed femlab


## Min. :1920 Min. : 6.10 Min. : 1.200 Min. :22.70
## 1st Qu.:1939 1st Qu.: 8.70 1st Qu.: 4.200 1st Qu.:27.47
## Median :1958 Median :10.60 Median : 5.600 Median :37.10
## Mean :1958 Mean :13.27 Mean : 7.173 Mean :38.58
## 3rd Qu.:1977 3rd Qu.:20.30 3rd Qu.: 7.500 3rd Qu.:47.80
## Max. :1996 Max. :22.80 Max. :24.900 Max. :59.30
## marriage birth military
## Min. : 49.70 Min. : 65.30 Min. : 1.940
## 1st Qu.: 61.90 1st Qu.: 68.90 1st Qu.: 3.469
## Median : 74.10 Median : 85.90 Median : 9.102
## Mean : 72.97 Mean : 88.89 Mean :12.365
## 3rd Qu.: 80.00 3rd Qu.:107.30 3rd Qu.:14.266
## Max. :118.10 Max. :122.90 Max. :86.641

##Conjunto de histogramas
hist(divusa$year)

44
Histogram of divusa$year
10
8
Frequency

6
4
2
0

1920 1940 1960 1980 2000

divusa$year

hist(divusa$divorce)

45
Histogram of divusa$divorce
15
Frequency

10
5
0

10 15 20

divusa$divorce

hist(divusa$unemployed)

46
Histogram of divusa$unemployed
40
30
Frequency

20
10
0

0 5 10 15 20 25

divusa$unemployed

hist(divusa$femlab)

47
Histogram of divusa$femlab
10 12 14
Frequency

8
6
4
2
0

20 30 40 50 60

divusa$femlab

hist(divusa$marriage)

48
Histogram of divusa$marriage
30
25
20
Frequency

15
10
5
0

40 60 80 100 120

divusa$marriage

hist(divusa$birth)

49
Histogram of divusa$birth
20
15
Frequency

10
5
0

60 70 80 90 100 110 120 130

divusa$birth

hist(divusa$military)

50
Histogram of divusa$military
40
30
Frequency

20
10
0

0 20 40 60 80

divusa$military

#Conjunto de gráficos de dispersão


plot(divusa$year)

51
1980
divusa$year

1960
1940
1920

0 20 40 60 80

Index

plot(divusa$divorce)

52
20
divusa$divorce

15
10

0 20 40 60 80

Index

plot(divusa$unemployed)

53
25
20
divusa$unemployed

15
10
5

0 20 40 60 80

Index

plot(divusa$femlab)

54
60
50
divusa$femlab

40
30

0 20 40 60 80

Index

plot(divusa$marriage)

55
110
divusa$marriage

90
80
70
60
50

0 20 40 60 80

Index

plot(divusa$birth)

56
100 110 120
divusa$birth

90
80
70

0 20 40 60 80

Index

plot(divusa$military)

57
80
divusa$military

60
40
20
0

0 20 40 60 80

Index

##Conjunto de densidades
plot(density(divusa$year))

58
density.default(x = divusa$year)
0.012
0.008
Density

0.004
0.000

1900 1920 1940 1960 1980 2000 2020

N = 77 Bandwidth = 8.446

plot(density(divusa$divorce))

59
density.default(x = divusa$divorce)
0.08
0.06
Density

0.04
0.02
0.00

0 5 10 15 20 25 30

N = 77 Bandwidth = 2.14

plot(density(divusa$unemployed))

60
density.default(x = divusa$unemployed)
0.15
0.10
Density

0.05
0.00

0 5 10 15 20 25

N = 77 Bandwidth = 0.9297

plot(density(divusa$femlab))

61
density.default(x = divusa$femlab)
0.030
0.020
Density

0.010
0.000

10 20 30 40 50 60 70

N = 77 Bandwidth = 4.442

plot(density(divusa$marriage))

62
density.default(x = divusa$marriage)
0.030
0.020
Density

0.010
0.000

40 60 80 100 120

N = 77 Bandwidth = 4.955

plot(density(divusa$birth))

63
density.default(x = divusa$birth)
0.020
0.015
Density

0.010
0.005
0.000

40 60 80 100 120 140

N = 77 Bandwidth = 7.369

plot(density(divusa$military))

64
density.default(x = divusa$military)
0.04
Density

0.02
0.00

0 20 40 60 80

N = 77 Bandwidth = 3.042

#Conjunto de gráficos ordenados


barplot(sort(divusa$year))

65
1500
1000
500
0

barplot(sort(divusa$divorce))

66
20
15
10
5
0

barplot(sort(divusa$unemployed))

67
20
15
10
5
0

barplot(sort(divusa$femlab))

68
50
40
30
20
10
0

barplot(sort(divusa$marriage))

69
100
80
60
40
20
0

barplot(sort(divusa$birth))

70
100 120
80
60
40
20
0

barplot(sort(divusa$military))

71
80
60
40
20
0

##Conjunto de retas de regressão da primeira variavel


linear1<-lm(divusa$divorce~divusa$unemployed)
coef(linear1)

## (Intercept) divusa$unemployed
## 14.954236 -0.234974

plot(divusa$divorce~divusa$unemployed)
abline(linear1,col=2)

72
20
divusa$divorce

15
10

5 10 15 20 25

divusa$unemployed

linear2<-lm(divusa$divorce~divusa$femlab)
coef(linear2)

## (Intercept) divusa$femlab
## -3.6552722 0.4386697

plot(divusa$divorce~divusa$femlab)
abline(linear2,col=3)

73
20
divusa$divorce

15
10

30 40 50 60

divusa$femlab

linear3<-lm(divusa$divorce~divusa$birth)
coef(linear3)

## (Intercept) divusa$birth
## 31.905774 -0.209667

plot(divusa$divorce~divusa$birth)
abline(linear3,col=4)

74
20
divusa$divorce

15
10

70 80 90 100 110 120

divusa$birth

linear4<-lm(divusa$divorce~divusa$military)
coef(linear5)

## (Intercept) prostate$pgg45
## 0.90816923 0.01812199

plot(divusa$divorce~divusa$military)
abline(linear5,col=5)

75
20
divusa$divorce

15
10

0 20 40 60 80

divusa$military

Exercicio 3

library(faraway)
data(prostate)
lreg <- lm(lpsa ~ lcavol, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.539431908779019"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.787499423513711"

r2conjunto=c()
erroresiduopadraoconjunto=c()
lreg <- lm(lpsa ~ lcavol+lweight, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.585934512070213"

76
print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.750646932552003"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.626440253553244"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.71680938995835"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi+lbph, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.636603479801418"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.710823197727069"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi+lbph+age, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.644102401261455"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.707305372441944"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi+lbph+age+lcp, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.645112974108872"

77
print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.710213512046953"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi+lbph+age+lcp+pgg45, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.65443165616093"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.704753265042738"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
lreg <- lm(lpsa ~ lcavol+lweight+svi+lbph+age+lcp+pgg45+gleason, prostate)
somadelmreg <- summary(lreg)
print(paste0("R2: ", somadelmreg$r.squared))

## [1] "R2: 0.654754085299708"

print(paste0("Erro resíduo padrão: ", somadelmreg$sigma))

## [1] "Erro resíduo padrão: 0.708415511834863"

r2conjunto=c(r2conjunto,somadelmreg$r.squared)
erroresiduopadraoconjunto=c(erroresiduopadraoconjunto,somadelmreg$sigma)
plot(r2conjunto,type="b",main="Tendência de R ao quadrado",ylab = "R ao quadrado")

78
Tendência de R ao quadrado
0.65
R ao quadrado

0.63
0.61
0.59

1 2 3 4 5 6 7

Index

plot(erroresiduopadraoconjunto,type="b",main="Tendência do erro residual padrão",ylab="Erro residual pad

79
Tendência do erro residual padrão
0.75
0.74
Erro residual padrão

0.73
0.72
0.71

1 2 3 4 5 6 7

Index

Exercício 4

f=function(x){
y=((-1+(sqrt(1+8*x)))/2)
return(y)
}
vetor=c()
while(length(vetor)<1000){
u=runif(1)
vetor=append(vetor,f(u))
}
hist(vetor)

80
Histogram of vetor
150
100
Frequency

50
0

0.0 0.2 0.4 0.6 0.8 1.0

vetor

plot(density(vetor))

81
density.default(x = vetor)
0.0 0.2 0.4 0.6 0.8 1.0 1.2
Density

−0.2 0.0 0.2 0.4 0.6 0.8 1.0 1.2

N = 1000 Bandwidth = 0.06231

82

Vous aimerez peut-être aussi