Vous êtes sur la page 1sur 3

5/15/2019 Project

In [92]:  import numpy as np


import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import matplotlib.pylab as plt
import warnings
from sklearn import svm
%matplotlib inline
plt.rcParams['figure.figsize'] = 10, 8

In [93]:  data = pd.read_csv('dataset.csv')


data=data.drop(["RowNumber","CustomerId","Surname"],axis=1)

In [94]:  ▾ for field in ["Geography","Gender"]:


▾ for value in data[field].unique():
data[value] = pd.Series(data[field] == value, dtype=int)
data = data.drop([field], axis=1)

In [95]:  data.head()

Out[95]:
CreditScore Age Tenure Balance NumOfProducts HasCrCard IsActiveMember Estimat

0 619 42 2 0.00 1 1 1 1

1 608 41 1 83807.86 1 0 1 1

2 502 42 8 159660.80 3 1 0 1

3 699 39 1 0.00 2 0 0

4 850 43 2 125510.82 1 1 1

In [96]:  x = data.drop(['Exited'], axis=1)


y = data['Exited']

x_train,x_test,y_train,y_test = train_test_split(x, y, train_size=.75)

https://jupyterhub.stern.nyu.edu/user/yh1947/notebooks/notes/gv760/data_mining_spring2019/Project.ipynb 1/3
5/15/2019 Project

In [98]:  warnings.filterwarnings("ignore")
model1=LogisticRegression()
model2=DecisionTreeClassifier(max_depth=10)

tprs = []
fprs = []
roc_labels = []
aucs=[]

model1.fit(x_train,y_train)
y_test_probability_1 = model1.predict_proba(x_test)[:, 1]
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_test_probability_1)
auc = metrics.roc_auc_score(y_test, y_test_probability_1)
tprs.append(tpr)
fprs.append(fpr)
aucs.append(auc)
roc_labels.append("Logistic Regression")

model2.fit(x_train,y_train)
y_test_probability_2 = model2.predict_proba(x_test)[:, 1]
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_test_probability_2)
auc = metrics.roc_auc_score(y_test, y_test_probability_2)
tprs.append(tpr)
fprs.append(fpr)
aucs.append(auc)
roc_labels.append("Decision Tree")

▾ for fpr, tpr, roc_label, auc in zip(fprs, tprs, roc_labels, aucs):


plt.plot(fpr, tpr, label=roc_label+" AUC = %.2f" % round(auc, 2))

plt.xlabel("False positive rate (fpr)")


plt.ylabel("True positive rate (tpr)")
plt.legend()

Out[98]: <matplotlib.legend.Legend at 0x7fe4c841c748>

https://jupyterhub.stern.nyu.edu/user/yh1947/notebooks/notes/gv760/data_mining_spring2019/Project.ipynb 2/3
5/15/2019 Project

In [ ]: 

https://jupyterhub.stern.nyu.edu/user/yh1947/notebooks/notes/gv760/data_mining_spring2019/Project.ipynb 3/3

Vous aimerez peut-être aussi