Vous êtes sur la page 1sur 5

R_graphs_northyukon_days_zero_above_yukon

#/ R20100826
#/ Author: Daniel Gallant - 26 August 2010 - Moncton (New Brunswick, Canada)
#/ Executed on R version 2.8.1 (additional packages: none)
#/
#/ NOTE: This file is provided by the author to help, and is supposed to be free access.
#/ If you have to pay to access this file, it means somebody stole the file and is wrongfully
#/ profiteering from it. I used this script for my own work but provided it here with various
#/ notes for those in search of examples to learn R.
# ___________________________________________________________________
# | |
# | Example program for reorganising a dataset, plotting |
# | barcharts, and doing a linear regression (commented version) |
# |___________________________________________________________________|

data<-read.table("Rdata_northyukon_days_zero_above_yukon.txt",header=TRUE)

#1/... The "read.table" function extracts the data from a tab-delimited TXT file and the
# "header=TRUE" indicates that the first row in the file are the columns titles to be
# imported. The original datafile is not included, but if needed, you can easily create
# a ficticious one by creating a database with ficticious values based on
# the decription of the columns below (then save it as a tab-delimited TXT file.

#2/... Description of variables/columns in "Rdata_northyukon_days_zero_above_yukon.txt" :


# Station ............ Values: "Komakuk_beach" & "Shingle_point" (weather stations)
# Year ............... Ranges from 1959 to 2007
# Month .............. Values: November, December, January, February, March
# Max_temp ........... Number of days in that month where daily max. temp. reached zero+ degrees Celcius
# Max_missing_days ... Nb. days without data for daily max. temp. for that month
# Max_total_days ..... Nb. days with data for daily max. temp. for that month
# Min_temp ........... Nb. days in the month where daily min. temp. reached zero+ degrees
# Min_missing_days ... Nb. days without data for daily min. temp. for that month
# Min_total_days ..... Nb. days with data for daily min. temp. for that month
# Mean_temp .......... Nb. days in the month where daily mean temp. reached zero+ degrees
# Mean_missing_days .. Nb. days without data for daily mean temp. for that month
# Mean_total_days .... Nb. days with data for daily mean temp. for that month

#3/... ____________________________________________________________________________________________
# | FIRST GOAL: reorganise the dataset into two seperate ones (one for each station) and |
# | sum the values of the 5 months together to have only one value per winter/year |
# | for each variable/column. |
# |____________________________________________________________________________________________|

unique.years<-unique(data$Year) #4/... List of unique years


nb.years<-length(unique.years) #5/... Number of unique years
Page 1
R_graphs_northyukon_days_zero_above_yukon

nb.rows<-nb.years #6/... Number of unique years will define the lenght of the new datasets
nb.columns<-10 #7/... Columns/variables kept (elimination of "Station" & "Month")
unique.months<-unique(data$Month)
nb.months<-length(unique.months)
data.komakuk<-data[data$Station=="Komakuk_beach",] #8/... Seperates the data into 2 subsets (the 2 stations)
data.shingle<-data[data$Station=="Shingle_point",]
arranged.data.komakuk<-matrix(ncol=nb.columns,nrow=nb.rows) #9/... Specifies dimentions for the komakuk dataset
arranged.data.shingle<-matrix(ncol=nb.columns,nrow=nb.rows) #10/... The same for Shingle Point dataset
#11/... Loop for creating the Komakuk Beach "arranged" dataset, with summation of values for the 5 months:
index<-1
for (i in 1:length(unique.years)) #12/... "i" = loop counter ranging from 1 to the total number of unique years
{
year<-data.komakuk[data.komakuk$Year==unique.years[i],] #13/... Extracts part of dataset relating to year "i"

sum.max_temp<-sum(year$Max_temp) #14/... Sums the 5 months' values for year "i" together
sum.max_missing_days<-sum(year$Max_missing_days)
sum.max_total_days<-sum(year$Max_total_days)

sum.min_temp<-sum(year$Min_temp)
sum.min_missing_days<-sum(year$Min_missing_days)
sum.min_total_days<-sum(year$Min_total_days)
sum.mean_temp<-sum(year$Mean_temp)

sum.mean_missing_days<-sum(year$Mean_missing_days)

sum.mean_total_days<-sum(year$Mean_total_days)

arranged.data.komakuk[index,]<-c(year$Year[1],sum.max_temp,sum.max_missing_days,sum.max_total_days,
sum.min_temp,sum.min_missing_days,sum.min_total_days,sum.mean_temp,
sum.mean_missing_days,sum.mean_total_days) #15/... Reconstitutes row for year "i"
index<-index+1
}

col.labels<-c("Year","Max_temp","Max_missing_days","Max_total_days","Min_temp","Min_missing_days","Min_total_days",
"Mean_temp","Mean_missing_days","Mean_total_days") #16/... Labels for columns of the arranged datasets

Page 2
R_graphs_northyukon_days_zero_above_yukon
arranged.data.komakuk<-data.frame(arranged.data.komakuk) #17/... Sets the re-organised data as a dataframe object
names(arranged.data.komakuk)<-col.labels #18/... Adds the column labels to the dataframe

Max_temp_corrected<-arranged.data.komakuk$Max_temp*151/arranged.data.komakuk$Max_total_days
arranged.data.komakuk.11<-cbind(arranged.data.komakuk,Max_temp_corrected) #19/... "cbind" binds objects columns-wise
arranged.data.komakuk<-data.frame(arranged.data.komakuk.11)

#20/... The three lines above create corrected values for the variable "Max_temp" by correcting for the
# number of days with missing data over the 5 month period of interest (151 days total). This is necessary
# because the number of days with missing data for a particular variable changes from year to year. This
# correction assumes that that the outcome of the variable (here, "Max_temp") are independant from the days
# with missing data. The corrected values in seperate object "Max_temp_corrected" are then incorporated into
# the dataset as the 11th column and bears the same column title as its object name.

rm("year","sum.max_temp","sum.max_missing_days","sum.max_total_days","sum.min_temp","sum.min_missing_days",
"sum.min_total_days","sum.mean_temp","sum.mean_missing_days","sum.mean_total_days","index",
"Max_temp_corrected") #21/... Removing objects

#22/... Second seperate loop for creating the Shingle Point arranged dataset :
index<-1
for (i in 1:length(unique.years))
{
year<-data.shingle[data.shingle$Year==unique.years[i],]

sum.max_temp<-sum(year$Max_temp)
sum.max_missing_days<-sum(year$Max_missing_days)
sum.max_total_days<-sum(year$Max_total_days)
sum.min_temp<-sum(year$Min_temp)

sum.min_missing_days<-sum(year$Min_missing_days)

sum.min_total_days<-sum(year$Min_total_days)
sum.mean_temp<-sum(year$Mean_temp)

sum.mean_missing_days<-sum(year$Mean_missing_days)

sum.mean_total_days<-sum(year$Mean_total_days)

arranged.data.shingle[index,]<-c(year$Year[1],sum.max_temp,sum.max_missing_days,sum.max_total_days,
sum.min_temp,sum.min_missing_days,sum.min_total_days,sum.mean_temp,
sum.mean_missing_days,sum.mean_total_days)
Page 3
R_graphs_northyukon_days_zero_above_yukon
index<-index+1
}
arranged.data.shingle<-data.frame(arranged.data.shingle)
names(arranged.data.shingle)<-col.labels
Max_temp_corrected<-arranged.data.shingle$Max_temp*151/arranged.data.shingle$Max_total_days
arranged.data.shingle.11<-cbind(arranged.data.shingle,Max_temp_corrected)
arranged.data.shingle<-data.frame(arranged.data.shingle.11)

rm("year","sum.max_temp","sum.max_missing_days","sum.max_total_days","sum.min_temp","sum.min_missing_days",
"sum.min_total_days","sum.mean_temp","sum.mean_missing_days","sum.mean_total_days","index",
"Max_temp_corrected")
#23/... ____________________________________________________________________________________
# | SECOND GOAL: Plot a barplot graph. The example below is for the number of days |
# | that maximum daily temperature reached zero degrees or more during |
# | the course of these 5 months |
# |____________________________________________________________________________________|
par(las=1) #24/... "las" controls the horizontal VS vertical orientation of the values show along the graph's axes
graph<-barplot(arranged.data.komakuk$Max_temp_corrected,names.arg=c(unique.years),ylim=range(0:12),ylab="Number of Days",
xlab="Year", cex.lab=1.5,cex.axis=1.0)
#25/... "barplot" is the function that creates de bargraph, "ylim" manually sets the range limit
# for the Y-axis of the graph, "arg=c" is used to put the labels on the X-axis using the
# "unique.years" object, "ylab & xlab" set the titles of the axis, "cex.lab" set the size
# of the axes titles, "cex.axis" sets the size of the values displayed along the axes.

#par(las=1)
#graph<-barplot(arranged.data.shingle$Max_temp_corrected,names.arg=c(unique.years),ylim=range(0:12),ylab="Number of Days",
# xlab="Year", cex.lab=1.5,cex.axis=1.0)

#26/... This last one above is used for graphing data from the Shingle Point weather station (remove "#").
#28/... __________________________________________________________________________________
# | THIRD GOAL: Fit a simple linear regression model to the data, of |
# | "Max_days_corrected" against "Year" for both Komakuk Beach and Shingle Point |
# |__________________________________________________________________________________|

linear.regression.komakuk<-lm(arranged.data.komakuk$Max_temp_corrected~arranged.data.komakuk$Year)
linear.regression.shingle<-lm(arranged.data.shingle$Max_temp_corrected~arranged.data.shingle$Year)

#29/... "lm( )" is the function to fit a linera model to the data, the independent variable is "YEAR" and
# the dependent variable is "Max_temp_corrected". They are accessed from the dataframe objects
Page 4
R_graphs_northyukon_days_zero_above_yukon
# by using "$" and "~" is used to link independent and dependent variable.

summary(linear.regression.komakuk) #30/... "summary( )" prints out the results of the analysis
summary(linear.regression.shingle)

#31/... The final dataset objects are "arranged.data.komakuk" and "arranged.data.shingle"


# and the results of the analyses are stored in "linear.regression.komakuk" and
# "linear.regression.shingle".

Page 5

Vous aimerez peut-être aussi