from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from string import punctuation
from wordcloud import WordCloud


#reading the data using pandas
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

#call the head of train and test data sets
train.head()
test.head()

#get the shape of two data sets
print('The training data set contains {} rows and {} columns'.format(train.shape[0],train.shape[1]))
print('The testing data set contains {} rows and {} columns'.format(test.shape[0],test.shape[1]))

The training data set contains 7613 rows and 5 columns
The testing data set contains 3263 rows and 4 columns

	id	keyword	location	text	target
0	1	NaN	NaN	Our Deeds are the Reason of this #earthquake M...	1
1	4	NaN	NaN	Forest fire near La Ronge Sask. Canada	1
2	5	NaN	NaN	All residents asked to 'shelter in place' are ...	1
3	6	NaN	NaN	13,000 people receive #wildfires evacuation or...	1
4	7	NaN	NaN	Just got sent this photo from Ruby #Alaska as ...	1

	id	keyword	location	text
0	0	NaN	NaN	Just happened a terrible car crash
1	2	NaN	NaN	Heard about #earthquake is different cities, s...
2	3	NaN	NaN	there is a forest fire at spot pond, geese are...
3	9	NaN	NaN	Apocalypse lighting. #Spokane #wildfires
4	11	NaN	NaN	Typhoon Soudelor kills 28 in China and Taiwan

I. Reading Data¶

Import Necessary Packages¶

Reading the Text-Based Data set¶