Académique Documents
Professionnel Documents
Culture Documents
read.csv("https://raw.githubusercontent.com/VictorManuelGP/datamining/master/FINAL_
MD", sep = ";")
##Exploracion de Data
head(final)
dim(final)
str(final)
summary(final)
str(final)
##Graficos Pie
par(mfrow=c(2,2))
gen<-prop.table(table(final.imputacion$Genero))*100
grafico<-paste(names,(table(final.imputacion$Genero)), "\n",
prop.table(table(final.imputacion$Genero))*100, "%", sep = )
pie(gen, labels = grafico, main="Genero")
soc<-prop.table(table(final.imputacion$Socio))*100
grafico<-paste(names(table(final.imputacion$Socio)), "\n",
prop.table(table(final.imputacion$Socio))*100, "%", sep = )
pie(soc, labels = grafico, main="Socio")
dep<-prop.table(table(final.imputacion$Dependientes))*100
grafico<-paste(names(table(final.imputacion$Dependientes)), "\n",
prop.table(table(final.imputacion$Dependientes))*100, "%", sep = )
pie(dep, labels = grafico, main="Dependientes")
mult<-prop.table(table(final.imputacion$MultipleLineas))*100
grafico<-paste(names(table(final.imputacion$MultipleLineas)), "\n",
prop.table(table(final.imputacion$MultipleLineas))*100, "%", sep = )
pie(mult, labels = grafico, main="Multilineas")
par(mfrow=c(2,2))
int<-prop.table(table(final.imputacion$Internet))*100
grafico<-paste(names(table(final.imputacion$Internet)), "\n",
prop.table(table(final.imputacion$Internet))*100, "%", sep = )
pie(int, labels = grafico, main="Internet")
seg<-prop.table(table(final.imputacion$SeguridadOnline))*100
grafico<-paste(names(table(final.imputacion$SeguridadOnline)), "\n",
prop.table(table(final.imputacion$SeguridadOnline))*100, "%", sep
= )
pie(seg, labels = grafico, main="Seguridad Online")
bac<-prop.table(table(final.imputacion$Backup))*100
grafico<-paste(names(table(final.imputacion$Backup)), "\n",
prop.table(table(final.imputacion$Backup))*100, "%", sep = )
pie(bac, labels = grafico, main="Backup")
pro<-prop.table(table(final.imputacion$ProteccionDispositivo))*100
grafico<-paste(names(table(final.imputacion$ProteccionDispositivo)), "\n",
prop.table(table(final.imputacion$ProteccionDispositivo))*100, "%",
sep = )
pie(pro, labels = grafico, main="Proteccion Dispositivo")
par(mfrow=c(2,2))
no<-prop.table(table(final.imputacion$No.servicio.Internet))*100
grafico<-paste(names(table(final.imputacion$No.servicio.Internet)), "\n",
prop.table(table(final.imputacion$No.servicio.Internet))*100, "%",
sep = )
pie(no, labels = grafico, main="No Servicio internet")
tv<-prop.table(table(final.imputacion$TVPago))*100
grafico<-paste(names(table(final.imputacion$TVPago)), "\n",
prop.table(table(final.imputacion$TVPago))*100, "%", sep = )
pie(tv, labels = grafico, main="Tv Pago")
pel<-prop.table(table(final.imputacion$PeliculasStreaming))*100
grafico<-paste(names(table(final.imputacion$PeliculasStreaming)), "\n",
prop.table(table(final.imputacion$PeliculasStreaming))*100, "%", sep
= )
pie(pel, labels = grafico, main="Peliculas Streaming")
ter<-prop.table(table(final.imputacion$TerminoContrato))*100
grafico<-paste(names(table(final.imputacion$TerminoContrato)), "\n",
prop.table(table(final.imputacion$TerminoContrato))*100, "%", sep
= )
pie(ter, labels = grafico, main="Termino Contrato")
par(mfrow=c(2,2))
fac<-prop.table(table(final.imputacion$FactElectronica))*100
grafico<-paste(names(table(final.imputacion$FactElectronica)), "\n",
prop.table(table(final.imputacion$FactElectronica))*100, "%", sep
= )
pie(fac, labels = grafico, main="Facturacion Electronica")
met<-prop.table(table(final.imputacion$MetodoPago))*100
grafico<-paste(names(table(final.imputacion$MetodoPago)), "\n",
prop.table(table(final.imputacion$MetodoPago))*100, "%", sep = )
pie(met, labels = grafico, main="Metodo pago")
fono<-prop.table(table(final.imputacion$FonoServicio))*100
grafico<-paste(names(table(final.imputacion$FonoServicio)), "\n",
prop.table(table(final.imputacion$FonoServicio))*100, "%", sep = )
pie(fono, labels = grafico, main="Fono Servicio")
churn<-prop.table(table(final.imputacion$Churn))*100
grafico<-paste(names(table(final.imputacion$Churn)), "\n",
prop.table(table(final.imputacion$Churn))*100, "%", sep = )
pie(churn, labels = grafico, main="Churn")
##Eliminando la variable id.cliente
library(VIM)
final.Vna = final[,-1]
##grafico mostrando valores nulos
aggr(final.Vna, numbers=TRUE)
##Imputacion de datos
library(DMwR)
library(kknn)
final.imputacion = knnImputation(final.Vna,k = 20, scale = T, meth =
"weighAvg",distData = NULL)
final.imputacion$ImporteTotal = as.numeric(final.imputacion$ImporteTotal)
final.imputacion$CargoMensual = as.numeric(final.imputacion$CargoMensual)
final.imputacion$Genero = as.numeric(final.imputacion$Genero)
final.imputacion$Socio = as.numeric(final.imputacion$Socio)
final.imputacion$Dependientes = as.numeric(final.imputacion$Dependientes)
final.imputacion$MultipleLineas = as.numeric(final.imputacion$MultipleLineas)
final.imputacion$Internet = as.numeric(final.imputacion$Internet)
final.imputacion$SeguridadOnline = as.numeric(final.imputacion$SeguridadOnline)
final.imputacion$FactElectronica = as.numeric(final.imputacion$FactElectronica)
final.imputacion$MetodoPago = as.numeric(final.imputacion$MetodoPago)
final.imputacion$FonoServicio = as.numeric(final.imputacion$FonoServicio)
final.imputacion$TerminoContrato = as.numeric(final.imputacion$TerminoContrato)
final.imputacion$PeliculasStreaming =
as.numeric(final.imputacion$PeliculasStreaming)
final.imputacion$TVPago = as.numeric(final.imputacion$TVPago)
final.imputacion$No.servicio.Internet =
as.numeric(final.imputacion$No.servicio.Internet)
final.imputacion$Backup = as.numeric(final.imputacion$Backup)
final.imputacion$ProteccionDispositivo =
as.numeric(final.imputacion$ProteccionDispositivo)
final.imputacion$Churn = as.factor(final.imputacion$Churn)
summary(final.imputacion)
##
library(unbalanced)
n = ncol(final.imputacion)
output = final.imputacion$Churn
input = final.imputacion[,-n]
str(newData)
finalChurn = newData[,-c(1:6,8:9,11:12,16:17,19)]
str(finalChurn)
dim(finalChurn)
##separacion de data
muestra<-sample(25352,10866)
train<-finalChurn[-muestra,]
test<-finalChurn[muestra,]
##-------------------------------------------------------------------##
##?Modelo
library(rpart)
library(caret)
set.seed(123)
modeloA<-rpart(`finalB$Y`~.,data =train,method = "class", cp=.0001)
predA<-predict(modeloA, test, type = "class")
resulA <- confusionMatrix(predA, test$`finalB$Y`)
resulA
##Curba ROC
library(ROCR)
predAA<-predict(modeloA, test, type = "prob")[,2]
predA1 <- prediction(predAA, test$`finalB$Y`)
predA2<-ROCR::performance(predA1, "tpr", "fpr")
plot(predA2, colorize = T)
lines(x=c(0, 1), y=c(0, 1), col=" blue", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="red", lwd=1, lty=4)
##GINI
AUROC <- round(ROCR::performance(predA1, measure = "auc")@y.values[[1]]*100, 2)
giniA <- (2*AUROC - 100)
giniA
##-------------------------------------------------------------------##
##NAIVE BAYES
library(e1071)
library(caret)
set.seed(123)
modeloB<-naiveBayes(`finalB$Y`~.,data =train,method = "class")
predB<-predict(modeloB, test, type = "class")
resulB <- confusionMatrix(predB, test$`finalB$Y`)
resulB
##Curba ROC
library(ROCR)
predBB<-predict(modeloB, test, type = "raw")[,2]
predB1 <- prediction(predBB, test$`finalB$Y`)
predB2<-ROCR::performance(predB1, "tpr", "fpr")
plot(predB2, colorize = T)
lines(x=c(0, 1), y=c(0, 1), col=" blue", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="red", lwd=1, lty=4)
##GINI
ROCB <- round(ROCR::performance(predB1, measure = "auc")@y.values[[1]]*100, 2)
giniB <- (2*ROCB - 100)
giniB
##-------------------------------------------------------------------##
##Redes Neuronales
library(nnet)
library(caret)
set.seed(123)
modeloR<-nnet(`finalB$Y`~.,data = train,size=10,trace=FALSE, maxit=1000)
predR<-predict(modeloR, test, type = "class")
predR<-as.factor(predR)
resulR <- confusionMatrix(predR, test$`finalB$Y`)
resulR
##Curba ROC
library(ROCR)
predRN<-predict(modeloR, test, type = "raw")
predR1 <- prediction(predRN, test$`finalB$Y`)
predR2<-ROCR::performance(predR1, "tpr", "fpr")
plot(predR2, colorize = T)
lines(x=c(0, 1), y=c(0, 1), col=" blue", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="red", lwd=1, lty=4)
##GINI
ROCRN <- round(ROCR::performance(predR1, measure = "auc")@y.values[[1]]*100, 2)
giniRN <- (2*ROCRN - 100)
giniRN