Vous êtes sur la page 1sur 11

An introduction to R language

Contents from a script fi le:


AN INTRODUCTION TO R ENVIRONMENT
data<-c("East","West","North","East")
print(data)
print(is.factor(data))
factor_data<-factor(data)
print(factor_data)
print(is.factor(factor_data))

height<-c(132,151,162,139,166,147,122)
weight<-c(66,71,70,56,61,54,49)
gender<-c("male","female","male","female","female","female","male")
input_data<-data.frame(height,weight,gender)
print(input_data) #you must select all code in order to compile it#
print(is.factor(input_data$gender))
print(input_data$gender)

data <-
c("East","West","East","North","North","East","West","West","West","East","Nor
th")
# Create the factors
factor_data <- factor(data)
print(factor_data)

# Apply the factor function with required order of the level.


new_order_data <- factor(factor_data,levels = c("East","West","North"))
print(new_order_data)

#Generate levels
v<-gl(3,4,labels=c(1,2,3))
print(v)

#Another example with strings


v<-gl(2,2,labels=c("North","South"))
print(v)

#Arrays
vector1<-c(3,4,5)
vector2<-c(10,9,11,12,10,6)
result<-array(c(vector1,vector2),dim=c(3,3,2))
print(result)

#Name rows and columns


column.names<-c("Col1","Col2","Col3")
row.names<-c("Row1","Row2","Row3")
matrix.names<-c("Matrix1","Matrix2")
result<-
array(c(vector1,vector2),dim=c(3,3,2),dimnames=list(row.names,column.names
,matrix.names))
print(result)
#let's organize a 2by2 array
res<-array(c(1,2,3,4,5,6,7,1),dim=c(4,2))
print(res)
res<-array(c(1,2,3,4,5,6,7,1),dim=c(2,4))
print(res)

#Access array elements


vector1<-c(5,9,3)
vector2<-c(10,11,12,13,14,15)
result<-array(c(vector1,vector2),dim=c(3,3,1))
print(result)
result<-array(c(vector1,vector2,vector1),dim=c(3,2,2))
print(result)
result<-array(c(vector1,vector1),dim=c(2,3))
print(result)
print(result[,2],result[2,])
print(result[2,])

#Col.names,row.names
vector<-array(c(c(1,2,3),c(2,3,4),c(3,4,5)),dim=c(3,3))
print(vector)
col.names<-c("COL1","COL2","COL3")
row.names<-c("R1","R2","R3")
result<-
array(c(c(1,2,3),c(2,3,4),c(3,4,5)),dim=c(3,3),dimnames=list(row.names,
col.names))
print(result)

#Apply a function across the elements


vector1<-c(5,9,3)
vector2<-c(9,11,12,13,14,15)
new.array<-array(c(vector1,vector2),dim=c(3,3))
print(new.array)
result<-apply(new.array,c(1),sum)#this makes the sum on each row
print(result)
result<-apply(new.array,c(2),sum)#this makes the sum on each column
print(result)

#Write to xlsx file


setwd("C:/Users/theodor/Documents/test_r_files")
write.xlsx(vector1,file="test.excelfile.xlsx",sheetName="TestSheet")
write.xlsx(x=vector1,file="test.excelfile.xlsx",sheetName="TestSheet",row.name
s = FALSE)
#Exercise 1
#a)Let's write a dataframe to an excel file
#b)Let's read some data from the same excel file,but another worksheet
#c)Compute the sum of the elements from a specific column
#d)COmpute the average of a row from a worksheet
name<-c("Theodor","Gabriel","Eliza","Margaret","Paul")
age<-c(24,32,41,29,31)
salary<-c(1000,2000,1300,1500,1900)
emp.data<-data.frame(name,age,salary)
write.xlsx(emp.data,file="test2.excelfile.xlsx",sheetName="TestSheet",row.name
s=FALSE)
#b)Reading part
data<-read.xlsx("test2.excelfile.xlsx",sheetIndex = 2)
print(data)
#c)+d)
print(data[,1])
print(data[,2])
x1=data[,1];x2=data[,2]
print(c(sum(x1),sum(x2)))# i compute the sums from each column
print(c(mean(x1),mean(x2)))
#print(c(std(x1),std(x2)))

#NA values:remove them


x<-c(12,7,3,4.2,-21,NA,12,13)
result.mean<-mean(x)
print(result.mean)
result.mean<-mean(x,na.rm=TRUE)
print(result.mean)
x<-c(NA,2,3,4,NA,7,NA,8)
result.median<-median(x,na.rm=TRUE)
print(result.median)

#Print unique values


getmode<-function(v){
uniqv<-unique(v)
uniqv[which.max(tabulate(match(v,uniqv)))]
}
v<-c(2,1,2,3,1,2,3,4,1,5,5,3,2,3,3)
result<-getmode(v)
print(result)
charv<-c("o","it","the","it","it")
result<-getmode(charv)
print(result)

#For loops
v<-LETTERS[1:4]
for (i in v){
print(i)
}
#Nested loops
v1<-LETTERS[1:5]
v2<-LETTERS[1:5]
for (i in v1)
for (j in v2)
{print(c(i,j))}

#Loops with break or next statement


v<-LETTERS[1:6]
for(i in v){if(i=="D"){next} #jumps over letter D
print(i)}

#repeat + break
v<-c("Help","please")
cnt<-2 #counter
repeat
{
print(v)
cnt<-cnt+1
if(cnt>5){break}}
#Exercise:
#set the counter to 6,and repeat printing until counter gets<=2

cnt<-6
repeat{print(v);cnt<-cnt-1;if(cnt<=2){break}}

#while loop
#Given a vector (1,2,3)print each vector c+i,until the second element is
greater than 7
x<-c(1,2,3)

while(x[2]<=7)
{
x<-x+1
print(x)#this prints one more sequence than permitted
}
#we use instead
x<-c(1,2,3)
while(x[2]<=7)
{
print(x)
x<-x+1
}
print(x+1)
print(seq(22,33))
print(c(1:4))
print(c(seq(10,12),c(1:4)))
print(mean(20:23))
print(sum(10:15))
new.function<-function(a)
{
for(i in 1:a)
{
b<-i^2
print(b)
}
}
new.function(10)
#Next part--functions
new.function<-function(a,b,c){
result<-a*b+c
print(result)
}
new.function(5,3,11)
function2<-function(a,b)
{
result<-a*b+a+b
print(result)
}
function2(2,2)
#misstyped arguments
new.function2<-function(a,b){
print(a^2)
print(a)
print(b)#this will give a missing argument error
}
new.function2(6)

#String manipulations
a<-"Hi";b<-",my name is";c<-"Theodor"
print(paste(a,b,c))

#Formatting
result<-format(23.123451234,digits=5)
print(result)
result2<-format(23.3212,digits=3)
print(result2)
result<-format("Hello",digits=4)
print(result)
#result<-format("Hello"23.1222,digits=4)#unexpected numeric constant in
result<-...
print(result)
result<-format("hello,23.1222",digits=4)
print(result)#prints the whole string because the number is between quotations
# to clear the screen you can either press CTRL+L or cat("\014")or cat("\F")

#Scientific notation
result<-format(c(6,13.1415),scientific=TRUE)
print(result)#it prints with e+01
#the minimum number of digits to the right of the decimal point
result<-format(23.4711,nsmall=3)
print(result)#it prints the number as it is
result<-format(23.47,nsmall=3)
print(result)# it adds one zero
result<-format(6)
print(result)# treats 6 as a string
result<-format(13.7,width=6)
print(result)#uses a 6 width character to print 13.7 which means that the
#string is moved to the right with 2 positions

#Left justify the strings


result<-format("Hello",width=8,justify="l")
print(result)#leaves 3 blank spaces to the right

result<-format("Hello",width=8,justify="c")
print(result)#It is printed centered
result<-format("Hello",width=8,justify="r")
print(result)
#number of characters
print(nchar("Hello,my name is Paul"))
x<-"Hello,how are you?"
print(toupper(x))
print(tolower(x))
result<-substring("Extract",5,7)
print(result)

#Lists
list_data<-list("RED","GREEN",c(10,11,21),TRUE,51.24)
print(list_data)

#List with names


list_data<-list(c("Jan","Feb","March"),matrix(c(3,9,5,1,-2,-
8),nrow=2),list("green",12.3))
names(list_data)<-c("1st Quarter","matrix","A_Inner_List")
print(list_data)
print(list_data[1]);print(list_data[1:2])#we hereby print the first two
components
print(list_data$matrix)
print(list_data$`1st Quarter`)
list_data[4]<-"New element" # This element is added to the list_data
print(list_data)
list_data[4]<-NULL
print(list_data)

#Exercise 2
#a)Create a list containing two matrices, defined by nrow and ncol
respectively
list_data<-list(matrix(c(1,2,3,4,5,6),ncol=3),matrix(c(1,2,3,4,5,6),nrow=3))
print(list_data)
#b)create two lists,transform them to vectors and add them after that
list1<-list(1:5)
print(list1)
list2<-list(10:14)
print(list2)
v1<-unlist(list1)
v2<-unlist(list2)
result<-v1+v2
print(result)

#print(list1+list2)#non-numeric argument to binary operator

#Create pies
x<-c(10,21,31,19)
labels<-c("Athens","London","Tokyo","Moscow")
png(file="cities.png")
pie(x,labels)
dev.off()
png(file="city_title_colours.jpg")
pie(x,labels,main="Cities",col=rainbow(length(x)))
dev.off()

x<-c(21,62,10,53)
labels<-c("Athens","New York","Bucharest","Tokyo")
piepercent<-round(100*x/sum(x),1)
png(file="city_pie_percent.jpg")
pie(x,labels=piepercent,main="City pie chart",col=rainbow(length(x)))
legend("topright",c("Athens","New
York","Bucharest","Tokyo"),cex=0.8,fill=rainbow(length(x)))
dev.off()
#there is no pie3D function inside our rStudio
#To install a package you must do like this:
#install.packages("C:\\Program Files\\R\\R-3.3.2\\plotrix_3.6-
4.zip",repos=NULL,type="source")
#not like this:install.packages("plotrix")
pie3D(x,labels=piepercent)#pie3D is a method of plotrix

#BARCHARTS
H<-c(10,21,22,13,14)
png(file="barchart.png")
barplot(H)
dev.off()
H<-c(10,10,11,11,12)
barplot(H)
#How to create a sequence of repeated elements
c<-rep(2,10)# repeats the number 2 10 times
print(c)
c<-rep(10,2)
print(c)
H<-c(rep(10,2),rep(11,4),rep(12,6))
barplot(H)#displays each occurence of 10,of 11 and of 12

#Barchar lable,title and color


H<-c(7,12,28,3,41)
M<-c("Mar","Apr","May","June","July")
png(file="barchart_months_revenue.png")
barplot(H,names.arg=M,xlab="Month",ylab="Revenue",col="blue"
,main="Revenue chart",border="red")#col comes from color
dev.off()

#Group bar chart and Stacked Bar Chart


colors<-c("Green","orange","brown","red")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North","South")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=4,ncol=4,byrow=TRUE)
png(file="barchar_stacked.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topleft",regions,cex=1.3,fill=colors)
dev.off()

# A similar group bar chart but with 3 colors


colors<-c("Green","orange","brown")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=3,ncol=5,byrow=TRUE)
png(file="barchar_stacked2.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topleft",regions,cex=1.3,fill=colors)
dev.off()

#We move now the legend topright


colors<-c("Green","orange","brown")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=3,ncol=5,byrow=TRUE)
png(file="barchar_stacked3.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topright",regions,cex=1.3,fill=colors)
dev.off()

#Connect to MYSQL
#mysqlconnection=dbConnect(MySQL(),user='root',password='your_password',dbname
='people',host="localhost")

result=dbSendQuery(mysqlconnection,"select * from clients")


data.frame=fetch(result,n=5)
print(data.frame)

result=dbSendQuery(mysqlconnection,"select * from clients")


data.frame=fetch(result)
print(data.frame)

#QUERY WITH FILTER CLAUSE


result=dbSendQuery(mysqlconnection,"select * from clients where
city='London'")
data.frame=fetch(result)
print(data.frame)
print(is.list(data.frame))# gives true-->so from a fetch(result) we get a R-
list

#another filtering example


result=dbSendQuery(mysqlconnection,"select * from clients where age>=30")
data.frame=fetch(result)
print(data.frame)

result=dbSendQuery(mysqlconnection,"select * from clients where age>=23 AND


age<=31")
data.frame=fetch(result)
print(data.frame)

#Update rows in tables


dbSendQuery(mysqlconnection,"update clients set age=25 where
Last_Name='Johnson'")
#Inserting data into tables
dbSendQuery(mysqlconnection,"insert into
clients(Last_Name,First_name,age,city)
values('Hanfstaengl','Helena',35,'Munich')")

#Creating tables into MySQL


dbWriteTable(mysqlconnection,"mtcars",mtcars[, ],overwrite=TRUE)

#Dropping tables in MySQL


dbSendQuery(mysqlconnection,'drop table if exists mtcars')
#Create histograms
v<-c(9,13,21,8,6,2,12,13)
png(file="histogram.png")
hist(v,xlab="Weight",col="yellow",border="blue")
dev.off()

#Create histogram with limits


v<-c(9,12,21,13,8,10,41,33,36,29,42)
png(file="histogram_lim_breaks.png")
hist(v,xlab="weight",col="green",border="blue",xlim=c(0,40),ylim=c(0,5),breaks
=5)
dev.off()

#Plots,line charts
v<-c(7,13,28,3,41)
png(file="line_chart.png")
plot(v,type="o")
dev.off()
#with color and label
png(file="line_chart_with_color.jpg")
plot(v,type="o",col="red",xlab="Month",ylab="Rain fall",main="Rain fall
chart")
dev.off()
#Multiple lines chart
v<-c(7,12,28,3,41)
t<-c(14,7,6,19,3)
png(file="line_chart_2_lines.jpg")
plot(v,type="o",col="red",xlab="Month",ylab="Rain Fall",main="Rain fall
chart")
lines(t,type="o",col="blue")
lines(t+1,type="o",col="blue")
lines(v-1,type="o",col="yellow")
dev.off()

#Scatterplots
input<-mtcars[,c('wt','mpg')]
print(head(input))
input<-mtcars[,c('wt','mpg')]
png(file="scatterplot.png")
plot(x=input$wt,y=input$mpg,
xlab="Weight",
ylab="Mileage",
xlim=c(2.5,5),ylim=c(15,30),
main="Weight vs Mileage")
dev.off()
#Scatterplots matrices
#We give the chart file a name
png(file="scatterplot_matrices.png")
pairs(~wt+mpg+disp+cyl,data=mtcars,main="Scatterplot Matrix")
pairs(~wt+mpg+disp,data=mtcars,main="Scatterplot Matrix")
dev.off()

#LINEAR REGRESSIONS
#Example 1
x<-c(151,174,138,186,128,136,179,163,152,131)
y<-c(63,81,56,91,47,57,76,72,62,48)
relation<-lm(y~x)
print(relation)
#Example 2
x<-c(1,2,3)
y<-c(3,5,8)
relation<-lm(y~x)
print(relation)#Least square estimator are used for the two quotients
#b=intercept,x=a,for f(x)=a*x+b
#SUmmary of the relationship
x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
relation <- lm(y~x)
print(summary(relation))

#predict function
x<-c(151,174,138,186,128,136,179,163,152,131)
y<-c(63,81,56,91,47,57,76,72,62,48)
relation<-lm(y~x)
a<-data.frame(x=179)
result<-predict(relation,a)
print(result)
b<-data.frame(x)f
result2<-predict(relation,b)
print(result2)
d<-data.frame(c(128,136))
result3<-predict(relation,d)
print(result3)

#Multiple regression model


input<-mtcars[,c("mpg","disp","hp","wt")]
print(head(input))
model<-lm(mpg~disp+hp+wt,data=input)
print(model)
cat("# # # # The coefficient values # # #","\n")
a<-coef(model)[1]
print(a)
Xdisp<-coef(model)[2]
Xhp<-coef(model)[3]
Xwt<-coef(model)[4]
print(Xdisp)
print(Xhp)
print(Xwt)
print(typeof(model))# model is a list type
print(model[1])#model[1] gives the coefficients
print(model[2])#model[2] gives the residuals
print(model[3])#model[3] gives the effect
print(model)
print(coef(model))