from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
import seaborn as sns
import nltk
import re
from string import punctuation
stemmer = nltk.PorterStemmer()
lemmatizer = nltk.WordNetLemmatizer()
stopwords = nltk.corpus.stopwords.words("english")
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!

True

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!

True


train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")


def lower_case(text):
    text = text.lower()
    return text

# using .apply() to apply the function to both the training text and testing text
train['text'] = train['text'].apply(lambda x: lower_case(x))
test['text'] = test['text'].apply(lambda x: lower_case(x))


def sent_tokenize(text):
    text = nltk.sent_tokenize(text)
    return text

# using .apply() to apply the function to both the training text and testing text
train['text_senttoken'] = train['text'].apply(lambda x: sent_tokenize(x))
test['text_senttoken'] = test['text'].apply(lambda x: sent_tokenize(x))

train.head(10)
test.head(10)


def word_tokenize(text):
    text = nltk.word_tokenize(text)
    return text

# using .apply() to apply the function to both the training text and testing text
train['text_wordtoken'] = train['text'].apply(lambda x: word_tokenize(x))
test['text_wordtoken'] = test['text'].apply(lambda x: word_tokenize(x))

train.head(10)
test.head(10)


def slt(text):
    #stemming
    text = [stemmer.stem(w) for w in text]
    #lemmatization
    text = [lemmatizer.lemmatize(w) for w in text]
    #stopword removal
    text = [w for w in text if w not in stopwords]
    return text

train['cleanText'] = train['text_wordtoken'].apply(lambda x: slt(x))
test['cleanText'] = test['text_wordtoken'].apply(lambda x: slt(x))


train.head(10)
test.head(10)

	id	keyword	location	text	target	text_senttoken
0	1	NaN	NaN	our deeds are the reason of this #earthquake m...	1	[our deeds are the reason of this #earthquake ...
1	4	NaN	NaN	forest fire near la ronge sask. canada	1	[forest fire near la ronge sask., canada]
2	5	NaN	NaN	all residents asked to 'shelter in place' are ...	1	[all residents asked to 'shelter in place' are...
3	6	NaN	NaN	13,000 people receive #wildfires evacuation or...	1	[13,000 people receive #wildfires evacuation o...
4	7	NaN	NaN	just got sent this photo from ruby #alaska as ...	1	[just got sent this photo from ruby #alaska as...
5	8	NaN	NaN	#rockyfire update => california hwy. 20 closed...	1	[#rockyfire update => california hwy., 20 clos...
6	10	NaN	NaN	#flood #disaster heavy rain causes flash flood...	1	[#flood #disaster heavy rain causes flash floo...
7	13	NaN	NaN	i'm on top of the hill and i can see a fire in...	1	[i'm on top of the hill and i can see a fire i...
8	14	NaN	NaN	there's an emergency evacuation happening now ...	1	[there's an emergency evacuation happening now...
9	15	NaN	NaN	i'm afraid that the tornado is coming to our a...	1	[i'm afraid that the tornado is coming to our ...

	id	keyword	location	text	text_senttoken
0	0	NaN	NaN	just happened a terrible car crash	[just happened a terrible car crash]
1	2	NaN	NaN	heard about #earthquake is different cities, s...	[heard about #earthquake is different cities, ...
2	3	NaN	NaN	there is a forest fire at spot pond, geese are...	[there is a forest fire at spot pond, geese ar...
3	9	NaN	NaN	apocalypse lighting. #spokane #wildfires	[apocalypse lighting., #spokane #wildfires]
4	11	NaN	NaN	typhoon soudelor kills 28 in china and taiwan	[typhoon soudelor kills 28 in china and taiwan]
5	12	NaN	NaN	we're shaking...it's an earthquake	[we're shaking...it's an earthquake]
6	21	NaN	NaN	they'd probably still show more life than arse...	[they'd probably still show more life than ars...
7	22	NaN	NaN	hey! how are you?	[hey!, how are you?]
8	27	NaN	NaN	what a nice hat?	[what a nice hat?]
9	29	NaN	NaN	fuck off!	[fuck off!]

	id	keyword	location	text	target	text_senttoken	text_wordtoken
0	1	NaN	NaN	our deeds are the reason of this #earthquake m...	1	[our deeds are the reason of this #earthquake ...	[our, deeds, are, the, reason, of, this, #, ea...
1	4	NaN	NaN	forest fire near la ronge sask. canada	1	[forest fire near la ronge sask., canada]	[forest, fire, near, la, ronge, sask, ., canada]
2	5	NaN	NaN	all residents asked to 'shelter in place' are ...	1	[all residents asked to 'shelter in place' are...	[all, residents, asked, to, 'shelter, in, plac...
3	6	NaN	NaN	13,000 people receive #wildfires evacuation or...	1	[13,000 people receive #wildfires evacuation o...	[13,000, people, receive, #, wildfires, evacua...
4	7	NaN	NaN	just got sent this photo from ruby #alaska as ...	1	[just got sent this photo from ruby #alaska as...	[just, got, sent, this, photo, from, ruby, #, ...
5	8	NaN	NaN	#rockyfire update => california hwy. 20 closed...	1	[#rockyfire update => california hwy., 20 clos...	[#, rockyfire, update, =, >, california, hwy, ...
6	10	NaN	NaN	#flood #disaster heavy rain causes flash flood...	1	[#flood #disaster heavy rain causes flash floo...	[#, flood, #, disaster, heavy, rain, causes, f...
7	13	NaN	NaN	i'm on top of the hill and i can see a fire in...	1	[i'm on top of the hill and i can see a fire i...	[i, 'm, on, top, of, the, hill, and, i, can, s...
8	14	NaN	NaN	there's an emergency evacuation happening now ...	1	[there's an emergency evacuation happening now...	[there, 's, an, emergency, evacuation, happeni...
9	15	NaN	NaN	i'm afraid that the tornado is coming to our a...	1	[i'm afraid that the tornado is coming to our ...	[i, 'm, afraid, that, the, tornado, is, coming...

	id	keyword	location	text	text_senttoken	text_wordtoken
0	0	NaN	NaN	just happened a terrible car crash	[just happened a terrible car crash]	[just, happened, a, terrible, car, crash]
1	2	NaN	NaN	heard about #earthquake is different cities, s...	[heard about #earthquake is different cities, ...	[heard, about, #, earthquake, is, different, c...
2	3	NaN	NaN	there is a forest fire at spot pond, geese are...	[there is a forest fire at spot pond, geese ar...	[there, is, a, forest, fire, at, spot, pond, ,...
3	9	NaN	NaN	apocalypse lighting. #spokane #wildfires	[apocalypse lighting., #spokane #wildfires]	[apocalypse, lighting, ., #, spokane, #, wildf...
4	11	NaN	NaN	typhoon soudelor kills 28 in china and taiwan	[typhoon soudelor kills 28 in china and taiwan]	[typhoon, soudelor, kills, 28, in, china, and,...
5	12	NaN	NaN	we're shaking...it's an earthquake	[we're shaking...it's an earthquake]	[we, 're, shaking, ..., it, 's, an, earthquake]
6	21	NaN	NaN	they'd probably still show more life than arse...	[they'd probably still show more life than ars...	[they, 'd, probably, still, show, more, life, ...
7	22	NaN	NaN	hey! how are you?	[hey!, how are you?]	[hey, !, how, are, you, ?]
8	27	NaN	NaN	what a nice hat?	[what a nice hat?]	[what, a, nice, hat, ?]
9	29	NaN	NaN	fuck off!	[fuck off!]	[fuck, off, !]

	id	keyword	location	text	target	text_senttoken	text_wordtoken	cleanText
0	1	NaN	NaN	our deeds are the reason of this #earthquake m...	1	[our deeds are the reason of this #earthquake ...	[our, deeds, are, the, reason, of, this, #, ea...	[deed, reason, thi, #, earthquak, may, allah, ...
1	4	NaN	NaN	forest fire near la ronge sask. canada	1	[forest fire near la ronge sask., canada]	[forest, fire, near, la, ronge, sask, ., canada]	[forest, fire, near, la, rong, sask, ., canada]
2	5	NaN	NaN	all residents asked to 'shelter in place' are ...	1	[all residents asked to 'shelter in place' are...	[all, residents, asked, to, 'shelter, in, plac...	[resid, ask, 'shelter, place, ', notifi, offic...
3	6	NaN	NaN	13,000 people receive #wildfires evacuation or...	1	[13,000 people receive #wildfires evacuation o...	[13,000, people, receive, #, wildfires, evacua...	[13,000, peopl, receiv, #, wildfir, evacu, ord...
4	7	NaN	NaN	just got sent this photo from ruby #alaska as ...	1	[just got sent this photo from ruby #alaska as...	[just, got, sent, this, photo, from, ruby, #, ...	[got, sent, thi, photo, rubi, #, alaska, smoke...
5	8	NaN	NaN	#rockyfire update => california hwy. 20 closed...	1	[#rockyfire update => california hwy., 20 clos...	[#, rockyfire, update, =, >, california, hwy, ...	[#, rockyfir, updat, =, >, california, hwi, .,...
6	10	NaN	NaN	#flood #disaster heavy rain causes flash flood...	1	[#flood #disaster heavy rain causes flash floo...	[#, flood, #, disaster, heavy, rain, causes, f...	[#, flood, #, disast, heavi, rain, caus, flash...
7	13	NaN	NaN	i'm on top of the hill and i can see a fire in...	1	[i'm on top of the hill and i can see a fire i...	[i, 'm, on, top, of, the, hill, and, i, can, s...	['m, top, hill, see, fire, wood, ...]
8	14	NaN	NaN	there's an emergency evacuation happening now ...	1	[there's an emergency evacuation happening now...	[there, 's, an, emergency, evacuation, happeni...	['s, emerg, evacu, happen, build, across, street]
9	15	NaN	NaN	i'm afraid that the tornado is coming to our a...	1	[i'm afraid that the tornado is coming to our ...	[i, 'm, afraid, that, the, tornado, is, coming...	['m, afraid, tornado, come, area, ...]

IV. Data Preprocessing¶

Import Necessary Packages & Reading Data sets¶

Lower case¶

Sentences tokenization¶

Words tokenization¶

Stemming - lemma - stopword¶