import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


data_url = "http://bit.ly/w-data"
s_data = pd.read_csv(data_url)
print("Data imported successfully")
s_data.head()

Data imported successfully


s_data.plot(x='Hours',y='Scores',style='o')
plt.title('Hours Vs Percentage')
plt.xlabel('Hours Studied')
plt.ylabel('Percentage Score')
plt.show()


X = s_data.iloc[:,:-1].values
Y = s_data.iloc[:,1].values


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,random_state=0)


from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)
print("Training Complete.")

Training Complete.


# Plotting regression line
line = regressor.coef_*X+regressor.intercept_
# Plotting for the test data
plt.scatter(X,Y)
plt.plot(X,line)
plt.title("Comparison")
plt.show()


# Making Predictions
print(x_test)
y_pred = regressor.predict(x_test)

[[1.5]
 [3.2]
 [7.4]
 [2.5]
 [5.9]]


# Comparing Actual Vs Predicted
df = pd.DataFrame({'Actual':y_test,'Predicted':y_pred})
df


score_pred = np.array([7])
score_pred = score_pred.reshape(-1,1)
predict = regressor.predict(score_pred)
print("No of hours = {}".format(7))
print("Predicted Score = {}".format(predict[0]))

No of hours = 7
Predicted Score = 71.39275540593033


# Evaluating the model
from sklearn import metrics
print('Mean Absolute Error: ',metrics.mean_absolute_error(y_test,y_pred))

Mean Absolute Error:  4.183859899002982

	Actual	Predicted
0	20	16.884145
1	27	33.732261
2	69	75.357018
3	30	26.794801
4	62	60.491033

	Hours	Scores
0	2.5	21
1	5.1	47
2	3.2	27
3	8.5	75
4	3.5	30