import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = pd.read_csv("https://raw.githubusercontent.com/chaitanyabaranwal/ParkinsonAnalysis/master/parkinsons.csv")
data.shape
(195, 24)
data.head()
name | MDVP:Fo(Hz) | MDVP:Fhi(Hz) | MDVP:Flo(Hz) | MDVP:Jitter(%) | MDVP:Jitter(Abs) | MDVP:RAP | MDVP:PPQ | Jitter:DDP | MDVP:Shimmer | MDVP:Shimmer(dB) | Shimmer:APQ3 | Shimmer:APQ5 | MDVP:APQ | Shimmer:DDA | NHR | HNR | RPDE | DFA | spread1 | spread2 | D2 | PPE | status | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | phon_R01_S01_1 | 119.992 | 157.302 | 74.997 | 0.00784 | 0.00007 | 0.00370 | 0.00554 | 0.01109 | 0.04374 | 0.426 | 0.02182 | 0.03130 | 0.02971 | 0.06545 | 0.02211 | 21.033 | 0.414783 | 0.815285 | -4.813031 | 0.266482 | 2.301442 | 0.284654 | 1 |
1 | phon_R01_S01_2 | 122.400 | 148.650 | 113.819 | 0.00968 | 0.00008 | 0.00465 | 0.00696 | 0.01394 | 0.06134 | 0.626 | 0.03134 | 0.04518 | 0.04368 | 0.09403 | 0.01929 | 19.085 | 0.458359 | 0.819521 | -4.075192 | 0.335590 | 2.486855 | 0.368674 | 1 |
2 | phon_R01_S01_3 | 116.682 | 131.111 | 111.555 | 0.01050 | 0.00009 | 0.00544 | 0.00781 | 0.01633 | 0.05233 | 0.482 | 0.02757 | 0.03858 | 0.03590 | 0.08270 | 0.01309 | 20.651 | 0.429895 | 0.825288 | -4.443179 | 0.311173 | 2.342259 | 0.332634 | 1 |
3 | phon_R01_S01_4 | 116.676 | 137.871 | 111.366 | 0.00997 | 0.00009 | 0.00502 | 0.00698 | 0.01505 | 0.05492 | 0.517 | 0.02924 | 0.04005 | 0.03772 | 0.08771 | 0.01353 | 20.644 | 0.434969 | 0.819235 | -4.117501 | 0.334147 | 2.405554 | 0.368975 | 1 |
4 | phon_R01_S01_5 | 116.014 | 141.781 | 110.655 | 0.01284 | 0.00011 | 0.00655 | 0.00908 | 0.01966 | 0.06425 | 0.584 | 0.03490 | 0.04825 | 0.04465 | 0.10470 | 0.01767 | 19.649 | 0.417356 | 0.823484 | -3.747787 | 0.234513 | 2.332180 | 0.410335 | 1 |
features = data.loc[:,data.columns!='status'].values[:,1:]
features
array([[119.992, 157.30200000000002, 74.997, ..., 0.266482, 2.3014419999999998, 0.284654], [122.4, 148.65, 113.819, ..., 0.33559, 2.486855, 0.368674], [116.682, 131.111, 111.555, ..., 0.311173, 2.342259, 0.33263400000000004], ..., [174.688, 240.005, 74.28699999999999, ..., 0.158453, 2.6797720000000003, 0.13172799999999998], [198.764, 396.961, 74.904, ..., 0.207454, 2.138608, 0.123306], [214.28900000000002, 260.277, 77.973, ..., 0.190667, 2.555477, 0.148569]], dtype=object)
labels = data.loc[:,'status'].values
labels
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
labels[labels==1].shape[0]
147
labels[labels==0].shape[0]
48
scalar = MinMaxScaler((-1,1))
x = scalar.fit_transform(features)
y = labels
x
array([[-0.63138346, -0.77481654, -0.89037042, ..., 0.17153026, -0.21867743, -0.0053808 ], [-0.6033463 , -0.81013911, -0.4433544 , ..., 0.48267409, -0.05370956, 0.34265204], [-0.66992292, -0.88174367, -0.46942324, ..., 0.37274182, -0.18236124, 0.19336492], ..., [ 0.00546073, -0.43717403, -0.89854572, ..., -0.31484696, 0.11793486, -0.63884033], [ 0.28578581, 0.20361309, -0.89144127, ..., -0.09423055, -0.36355605, -0.67372646], [ 0.46654868, -0.35441175, -0.85610326, ..., -0.16981039, 0.00734563, -0.5690805 ]])
y
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=32,random_state=42)
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(x_train,y_train)
y_pred = xgb.predict(x_test)
accuracy_score(y_test,y_pred)
0.96875