import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('diabetes.csv')
df.head()
Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | Outcome | |
---|---|---|---|---|---|---|---|---|---|
0 | 6 | 148 | 72 | 35 | 0 | 33.6 | 0.627 | 50 | 1 |
1 | 1 | 85 | 66 | 29 | 0 | 26.6 | 0.351 | 31 | 0 |
2 | 8 | 183 | 64 | 0 | 0 | 23.3 | 0.672 | 32 | 1 |
3 | 1 | 89 | 66 | 23 | 94 | 28.1 | 0.167 | 21 | 0 |
4 | 0 | 137 | 40 | 35 | 168 | 43.1 | 2.288 | 33 | 1 |
plt.rcParams.update({'font.size': 12})
df.hist(figsize=(12,8))
plt.show()
X = df[df.columns[:8]]
y = df[df.columns[8]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
classifier = tree.DecisionTreeClassifier(random_state=0)
classifier.fit(X_train, y_train)
DecisionTreeClassifier(random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(random_state=0)
plt.figure(figsize=(24,16))
tree.plot_tree(classifier)
plt.show()
accuracies = []
for k in range(1,8):
classifier = tree.DecisionTreeClassifier(random_state=0, max_depth=k)
classifier.fit(X_train, y_train)
accuracies.append(classifier.score(X_test, y_test))
k_list = range(1,8)
plt.plot(k_list, accuracies)
plt.xlabel('Max Depth of Tree')
plt.ylabel('Accuracy Score')
plt.title('Regression Accuracy using Different Decision Tree Widths')
plt.show()
classifier = tree.DecisionTreeClassifier(random_state=0, max_depth=7)
classifier.fit(X_train, y_train)
DecisionTreeClassifier(max_depth=7, random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(max_depth=7, random_state=0)
plt.figure(figsize=(24,16))
tree.plot_tree(classifier)
plt.show()
y_pred = classifier.predict(X)
print('Confusion matrix: ')
test_conf_matrix = pd.DataFrame(
confusion_matrix(y, y_pred),
index=['Actual 0:', 'Actual 1:'],
columns=['Predicted 0:', 'Predicted 1:']
)
print(test_conf_matrix)
Confusion matrix: Predicted 0: Predicted 1: Actual 0: 471 29 Actual 1: 73 195
df.columns
Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'], dtype='object')
#'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
NewPatient = [[2, 135, 70, 20, 0,30.0,0.48,35]]
predict = classifier.predict(NewPatient)
if predict == 0:
print("The model predicts you may not have diabetes.")
else:
print("The model predicts you may have diabetes.")
The model predicts you may not have diabetes.