Vous êtes sur la page 1sur 4

decision tree

data=read.csv('Buys.csv')
for(i in seq(1,ncol(data)))
{
data[,i]=as.numeric(data[,i])
}
c=unique(data[,ncol(data)])
toten=0
for(i in c)
{
v=length(which(data[ncol(data)]==i))/nrow(data)
toten=toten+(v*log2(v))
}
toten=-toten
ig=matrix(0,1,ncol(data)-1)
for(i in seq(ncol(data)-1))
{
p=unique(data[i])
for(j in p)
{
te=0
for(k in c)
{
v=length(which(data[i]==j&data[ncol(data)]==k))/length(which(data[i]==j))
if(v!=0)
{
te=te+(v*log2(v))
}
}
te=-te
ig[i]=ig[i]+(te*(length(which(data[i]==j))/nrow(data)))

}
}
ig=toten-ig
yt=which(ig==max(ig))
cat("\nThe best factor is",names(data)[yt])

decision tree inbuilt

library("rpart",lib.loc="C:/Program Files/R/R-3.1.3/library")
data=read.csv('Buys.csv',header=TRUE)
for(i in ncol(data))
{
data[,i]=as.numeric(data[,i])
}
fit=rpart(Buys~.,data,control=rpart.control(minsplit=2))
#printcp(fit) # display the results
#plotcp(fit) # visualize cross-validation results
#summary(fit) # detailed summary of splits
#plot(fit)
#text(dt)
# plot tree
plot(fit, uniform=TRUE,
main="Classification Tree for Kyphosis")
text(fit, use.n=TRUE, all=TRUE, cex=.8)

predict(fit,data,type="vector")
KNN
a=read.csv('buys.csv')
k=3
for(i in seq(ncol(a)))
{
a[,i]=as.numeric(a[,i])
}
dist=matrix(0,1,nrow(a))
test=a[3,]
k1=0
for(i in seq(nrow(a)))
{
x1=a[i,]
e=sqrt(sum(x1-test)^2)
e1=c(e,i)
if(k1==0)
{
e2=rbind(e1)
k1=k1+1
}else
{
e2=rbind(e2,e1)
}
}
ui=e2
new=e2[order(e2[,1]),]
w1=c()
for(i in seq(k))
{
w=new[i,2]
w1[i]=a[w,ncol(a)]
}
t=table(w1)
q=as.data.frame(t)
q=q[order(-q[2]),]
print(q[1,1])

KNN builtin

library(class)
data=read.csv('Buys.csv')
for(i in seq(ncol(data)))
{
data[,i]=as.numeric(data[,i])
}
e=data[,ncol(data)]
fit=knn(train=data,test=data,cl=e,k=3)

KK means

data=read.csv("Kmeans.csv",header=FALSE)
data[data=='?']=NA
data=na.omit(data)
for(i in seq(ncol(data)))
{
data[,i]=as.numeric(as.character(data[,i]))
}
a=data
k=2
b=c(1:nrow(a))
e=sample(b,k,replace=FALSE)
u=list()
for(i in seq(k))
{
u[[i]]=a[e[i],]
}
e1=0
e2=1
while(e2==1)
{
u1=list()
u1=u
iu=list()
ki=c()
for(i in seq(k))
{
ki[i]=0
}
for(i in seq(nrow(a)))
{
w=a[i,]
g=c()
for(j in seq(k))
{
w1=u[[j]]
g[j]=sqrt(sum((w - w1) ^ 2))
}
gt=which(g==min(g))
if(ki[gt]==0)
{
iu[[gt]]=rbind(w)
ki[gt]=ki[gt]+1
}else
{
iu[[gt]]=rbind(iu[[gt]],w)
}
}
for(i in seq(k))
{
u[[i]]=colMeans(iu[[i]])
}
gty=TRUE
for(i in seq(k))
{
if(all(u[[i]]==u1[[i]]))
{
gty=TRUE
}else
{
gty=FALSE
}
}

mle

a=rexp(50,0.2)
c=c()
for(i in seq(20))
{
b=sample(a,5)
c[i]=(1/mean(b))
}
mean(c)

Vous aimerez peut-être aussi