import pandas as pd
df = pd.read_csv("fake_news_dataset.csv") #reading the dataset 
df = df[['id', 'state', 'date_published', 'source',                       
       'category', 'sentiment_score', 'word_count', 'has_images',
       'has_videos', 'readability_score', 'num_shares', 'num_comments',
       'political_bias', 'fact_check_rating', 'is_satirical', 'trust_score',
       'source_reputation', 'clickbait_score', 'plagiarism_score', 'label']]   #we dont want author, title, character count or text because the
                                                                               #information is irrelevant, so we remove it

df.dropna #we drop all NA values

df['True'] = df['label'].replace({'Fake':0, 'Real': 1}) #we make a new column called "True" and changing the string "Fake" to 0 and "Real" to 1.

df = df[['id', 'state', 'date_published', 'source',
       'category', 'sentiment_score', 'word_count', 'has_images',
       'has_videos', 'readability_score', 'num_shares', 'num_comments',
       'political_bias', 'fact_check_rating', 'is_satirical', 'trust_score',
       'source_reputation', 'clickbait_score', 'plagiarism_score', 'True']]   #we do the same thing, we just remove  'label'
  
df.head(4)  #print out the first four samples

C:\Users\leoxi\AppData\Local\Temp\ipykernel_6828\3749520299.py:12: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  df['True'] = df['label'].replace({'Fake':0, 'Real': 1}) #we make a new column called "True" and changing the string "Fake" to 0 and "Real" to 1.

	id	state	date_published	source	category	sentiment_score	word_count	has_images	has_videos	readability_score	num_shares	num_comments	political_bias	fact_check_rating	is_satirical	trust_score	source_reputation	clickbait_score	plagiarism_score
0	1	Tennessee	30-11-2021	The Onion	Entertainment	-0.22	1302	0	0	66.18	47305	450	Center	FALSE	1	76	6	0.84	53.35
1	2	Wisconsin	02-09-2021	The Guardian	Technology	0.92	322	1	0	41.10	39804	530	Left	Mixed	1	1	5	0.85	28.28
2	3	Missouri	13-04-2021	New York Times	Sports	0.25	228	0	1	30.04	45860	763	Center	Mixed	0	57	1	0.72	0.38
3	4	North Carolina	08-03-2020	CNN	Sports	0.94	155	1	0	75.16	34222	945	Center	TRUE	1	18	10	0.92	32.20