from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
data = pd.read_csv("/content/gdrive/MyDrive/1stop.ai/Fake News Detection/data/news.csv")
data.shape
(6335, 4)
data.head()
Unnamed: 0 | title | text | label | |
---|---|---|---|---|
0 | 8476 | You Can Smell Hillary’s Fear | Daniel Greenfield, a Shillman Journalism Fello... | FAKE |
1 | 10294 | Watch The Exact Moment Paul Ryan Committed Pol... | Google Pinterest Digg Linkedin Reddit Stumbleu... | FAKE |
2 | 3608 | Kerry to go to Paris in gesture of sympathy | U.S. Secretary of State John F. Kerry said Mon... | REAL |
3 | 10142 | Bernie supporters on Twitter erupt in anger ag... | — Kaydee King (@KaydeeKing) November 9, 2016 T... | FAKE |
4 | 875 | The Battle of New York: Why This Primary Matters | It's primary day in New York and front-runners... | REAL |
labels = data.label
x_train, x_test, y_train, y_test = train_test_split(data['text'],labels,test_size=0.33,random_state=42)
tfidf = TfidfVectorizer(stop_words = 'english', max_df = 0.7)
tfidf_train = tfidf.fit_transform(x_train)
tfidf_test = tfidf.transform(x_test)
pac = PassiveAggressiveClassifier()
pac.fit(tfidf_train,y_train)
PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None, early_stopping=False, fit_intercept=True, loss='hinge', max_iter=1000, n_iter_no_change=5, n_jobs=None, random_state=None, shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)
y_pred = pac.predict(tfidf_test)
accuracy_score(y_test,y_pred)
0.9411764705882353
confusion_matrix(y_test,y_pred)
array([[1016, 55], [ 68, 952]])