Vous êtes sur la page 1sur 4

22/6/2019 Basri_14002164_tugas2-binary

In [1]:

# NIS : 14002164
# Nama : BASRI

# Import packages to visualize the classifer


import matplotlib.pyplot as plt
import warnings
# Import others
import time
import datetime as dt
import pandas as pd
import numpy as np

In [2]:

from sklearn import datasets, metrics


from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:

dataset = pd. read_csv("pima-indians-diabetes-header.csv")


#print(dataset.describe())
print(dataset.head())

pregnancies glucose blood_pressure skin_thickness insulin bmi \


0 6 148 72 35 0 33.6
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
4 0 137 40 35 168 43.1

diabetes_pedigree_func age outcome


0 0.627 50 1
1 0.351 31 0
2 0.672 32 1
3 0.167 21 0
4 2.288 33 1

In [4]:

data = dataset.values[:,: 8] # Representasi dari kolom parameter


labels = dataset.values[:, -1] # Representasi dari kolom label
print(data.shape, labels.shape)

(768, 8) (768,)

In [5]:

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random


_state=50)

NAIVE BAYES CLASSIFIER

localhost:8888/nbconvert/html/Basri_14002164_tugas2-binary.ipynb?download=false 1/4
22/6/2019 Basri_14002164_tugas2-binary

In [6]:

from sklearn.naive_bayes import GaussianNB

# Train classifier
clf = GaussianNB()
clf.fit(x_train, y_train)

Out[6]:

GaussianNB(priors=None, var_smoothing=1e-09)

In [7]:

# Menampilkan waktu training


start_time = dt.datetime.now()
clf.fit(x_train, y_train)
elapsed_time = dt.datetime.now() - start_time
print('Elapsed time, param searching {}'.format(str(elapsed_time)))

Elapsed time, param searching 0:00:00.004001

In [8]:

# Prediksi nilai dari test


y_pred = clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred, normalize=True)
accuracy

Out[8]:

0.7272727272727273

In [9]:

# Confusion Matrix
from sklearn.metrics import confusion_matrix

print('Confusion Matrix :')


results = confusion_matrix(y_test, y_pred)
print(results)
#print ('Accuracy Score :', accuracy_score(y_test, y_pred))

Confusion Matrix :
[[84 17]
[25 28]]

localhost:8888/nbconvert/html/Basri_14002164_tugas2-binary.ipynb?download=false 2/4
22/6/2019 Basri_14002164_tugas2-binary

In [10]:

from sklearn.metrics import classification_report

print ('Report : ')


print (classification_report(y_test, y_pred))

Report :
precision recall f1-score support

0.0 0.77 0.83 0.80 101


1.0 0.62 0.53 0.57 53

micro avg 0.73 0.73 0.73 154


macro avg 0.70 0.68 0.69 154
weighted avg 0.72 0.73 0.72 154

In [11]:

#Area under ROC curve

from sklearn.metrics import roc_auc_score

y_pred_proba = clf.predict_proba(x_test)[:,1]
auc = roc_auc_score(y_test, y_pred_proba)
print('AUC: %.2f' % auc)

#fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

#from sklearn.metrics import roc_auc_score


#roc_auc_score(y_test, y_pred)

AUC: 0.77

In [13]:

from sklearn.metrics import roc_curve

#y_pred_proba
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

In [14]:

def plot_roc_curve(fpr, tpr):


plt.plot(fpr, tpr, color='orange', label='ROC')
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.show()

localhost:8888/nbconvert/html/Basri_14002164_tugas2-binary.ipynb?download=false 3/4
22/6/2019 Basri_14002164_tugas2-binary

In [15]:

plot_roc_curve(fpr, tpr)

In [ ]:

localhost:8888/nbconvert/html/Basri_14002164_tugas2-binary.ipynb?download=false 4/4

Vous aimerez peut-être aussi