In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
In [2]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
In [3]:
data = pd.read_csv("/content/gdrive/MyDrive/1stop.ai/Fake News Detection/data/news.csv")
In [4]:
data.shape
Out[4]:
(6335, 4)
In [5]:
data.head()
Out[5]:
Unnamed: 0 title text label
0 8476 You Can Smell Hillary’s Fear Daniel Greenfield, a Shillman Journalism Fello... FAKE
1 10294 Watch The Exact Moment Paul Ryan Committed Pol... Google Pinterest Digg Linkedin Reddit Stumbleu... FAKE
2 3608 Kerry to go to Paris in gesture of sympathy U.S. Secretary of State John F. Kerry said Mon... REAL
3 10142 Bernie supporters on Twitter erupt in anger ag... — Kaydee King (@KaydeeKing) November 9, 2016 T... FAKE
4 875 The Battle of New York: Why This Primary Matters It's primary day in New York and front-runners... REAL
In [6]:
labels = data.label
In [7]:
x_train, x_test, y_train, y_test = train_test_split(data['text'],labels,test_size=0.33,random_state=42)
In [8]:
tfidf = TfidfVectorizer(stop_words = 'english', max_df = 0.7)
In [9]:
tfidf_train = tfidf.fit_transform(x_train)
In [10]:
tfidf_test = tfidf.transform(x_test)
In [11]:
pac = PassiveAggressiveClassifier()
In [12]:
pac.fit(tfidf_train,y_train)
Out[12]:
PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
                            early_stopping=False, fit_intercept=True,
                            loss='hinge', max_iter=1000, n_iter_no_change=5,
                            n_jobs=None, random_state=None, shuffle=True,
                            tol=0.001, validation_fraction=0.1, verbose=0,
                            warm_start=False)
In [13]:
y_pred = pac.predict(tfidf_test)
In [14]:
accuracy_score(y_test,y_pred)
Out[14]:
0.9411764705882353
In [15]:
confusion_matrix(y_test,y_pred)
Out[15]:
array([[1016,   55],
       [  68,  952]])