poem = "If you can keep your head when all about you,\nare losing theirs and blaming it on you.\nIf you can trust yourself when all men doubt you,\nbut make allowance for their doubting too.\nIf you can wait and not be tired by waiting,\nor being lied about, don’t deal in lies.\nOr being hated, don’t give way to hating,\nand yet don’t look too good, nor talk too wise:"


print(poem)

If you can keep your head when all about you,
are losing theirs and blaming it on you.
If you can trust yourself when all men doubt you,
but make allowance for their doubting too.
If you can wait and not be tired by waiting,
or being lied about, don’t deal in lies.
Or being hated, don’t give way to hating,
and yet don’t look too good, nor talk too wise:


import re

poem_without_newlines = re.sub(r'\n', ' ', poem)


from nltk.tokenize import word_tokenize

tokenized1 = word_tokenize(poem_without_newlines)
print(tokenized1)

['If', 'you', 'can', 'keep', 'your', 'head', 'when', 'all', 'about', 'you', ',', 'are', 'losing', 'theirs', 'and', 'blaming', 'it', 'on', 'you', '.', 'If', 'you', 'can', 'trust', 'yourself', 'when', 'all', 'men', 'doubt', 'you', ',', 'but', 'make', 'allowance', 'for', 'their', 'doubting', 'too', '.', 'If', 'you', 'can', 'wait', 'and', 'not', 'be', 'tired', 'by', 'waiting', ',', 'or', 'being', 'lied', 'about', ',', 'don', '’', 't', 'deal', 'in', 'lies', '.', 'Or', 'being', 'hated', ',', 'don', '’', 't', 'give', 'way', 'to', 'hating', ',', 'and', 'yet', 'don', '’', 't', 'look', 'too', 'good', ',', 'nor', 'talk', 'too', 'wise', ':']


from nltk.tokenize import sent_tokenize

tokenized2 = sent_tokenize(poem_without_newlines)
print(tokenized2)

['If you can keep your head when all about you, are losing theirs and blaming it on you.', 'If you can trust yourself when all men doubt you, but make allowance for their doubting too.', 'If you can wait and not be tired by waiting, or being lied about, don’t deal in lies.', 'Or being hated, don’t give way to hating, and yet don’t look too good, nor talk too wise:']


poem_without_newlines_lowercase = poem_without_newlines.lower()
poem_without_newlines_uppercase = poem_without_newlines.upper()


from nltk.corpus import stopwords 
stop_words = set(stopwords.words('english'))

poem_no_stop = [word for word in tokenized1 if word not in stop_words]

print(poem_no_stop)

['If', 'keep', 'head', ',', 'losing', 'blaming', '.', 'If', 'trust', 'men', 'doubt', ',', 'make', 'allowance', 'doubting', '.', 'If', 'wait', 'tired', 'waiting', ',', 'lied', ',', '’', 'deal', 'lies', '.', 'Or', 'hated', ',', '’', 'give', 'way', 'hating', ',', 'yet', '’', 'look', 'good', ',', 'talk', 'wise', ':']


from nltk.stem import PorterStemmer
stemmer = PorterStemmer()


stemmed = [stemmer.stem(token) for token in tokenized1]

print(stemmed)

['if', 'you', 'can', 'keep', 'your', 'head', 'when', 'all', 'about', 'you', ',', 'are', 'lose', 'their', 'and', 'blame', 'it', 'on', 'you', '.', 'if', 'you', 'can', 'trust', 'yourself', 'when', 'all', 'men', 'doubt', 'you', ',', 'but', 'make', 'allow', 'for', 'their', 'doubt', 'too', '.', 'if', 'you', 'can', 'wait', 'and', 'not', 'be', 'tire', 'by', 'wait', ',', 'or', 'be', 'lie', 'about', ',', 'don', '’', 't', 'deal', 'in', 'lie', '.', 'or', 'be', 'hate', ',', 'don', '’', 't', 'give', 'way', 'to', 'hate', ',', 'and', 'yet', 'don', '’', 't', 'look', 'too', 'good', ',', 'nor', 'talk', 'too', 'wise', ':']


from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()


lemmatized = [lemmatizer.lemmatize(token) for token in tokenized1]

print(lemmatized)

['If', 'you', 'can', 'keep', 'your', 'head', 'when', 'all', 'about', 'you', ',', 'are', 'losing', 'theirs', 'and', 'blaming', 'it', 'on', 'you', '.', 'If', 'you', 'can', 'trust', 'yourself', 'when', 'all', 'men', 'doubt', 'you', ',', 'but', 'make', 'allowance', 'for', 'their', 'doubting', 'too', '.', 'If', 'you', 'can', 'wait', 'and', 'not', 'be', 'tired', 'by', 'waiting', ',', 'or', 'being', 'lied', 'about', ',', 'don', '’', 't', 'deal', 'in', 'lie', '.', 'Or', 'being', 'hated', ',', 'don', '’', 't', 'give', 'way', 'to', 'hating', ',', 'and', 'yet', 'don', '’', 't', 'look', 'too', 'good', ',', 'nor', 'talk', 'too', 'wise', ':']


from nltk import pos_tag

poem_pos_tag = pos_tag(tokenized1)

print(poem_pos_tag)

[('If', 'IN'), ('you', 'PRP'), ('can', 'MD'), ('keep', 'VB'), ('your', 'PRP$'), ('head', 'NN'), ('when', 'WRB'), ('all', 'DT'), ('about', 'IN'), ('you', 'PRP'), (',', ','), ('are', 'VBP'), ('losing', 'VBG'), ('theirs', 'NNS'), ('and', 'CC'), ('blaming', 'VBG'), ('it', 'PRP'), ('on', 'IN'), ('you', 'PRP'), ('.', '.'), ('If', 'IN'), ('you', 'PRP'), ('can', 'MD'), ('trust', 'VB'), ('yourself', 'PRP'), ('when', 'WRB'), ('all', 'DT'), ('men', 'NNS'), ('doubt', 'VBP'), ('you', 'PRP'), (',', ','), ('but', 'CC'), ('make', 'VBP'), ('allowance', 'NN'), ('for', 'IN'), ('their', 'PRP$'), ('doubting', 'NN'), ('too', 'RB'), ('.', '.'), ('If', 'IN'), ('you', 'PRP'), ('can', 'MD'), ('wait', 'VB'), ('and', 'CC'), ('not', 'RB'), ('be', 'VB'), ('tired', 'VBN'), ('by', 'IN'), ('waiting', 'VBG'), (',', ','), ('or', 'CC'), ('being', 'VBG'), ('lied', 'VBN'), ('about', 'IN'), (',', ','), ('don', 'VB'), ('’', 'NNP'), ('t', 'JJ'), ('deal', 'NN'), ('in', 'IN'), ('lies', 'NNS'), ('.', '.'), ('Or', 'CC'), ('being', 'VBG'), ('hated', 'VBN'), (',', ','), ('don', 'VB'), ('’', 'JJ'), ('t', 'NNS'), ('give', 'VBP'), ('way', 'NN'), ('to', 'TO'), ('hating', 'NN'), (',', ','), ('and', 'CC'), ('yet', 'RB'), ('don', 'VB'), ('’', 'JJ'), ('t', 'JJ'), ('look', 'NN'), ('too', 'RB'), ('good', 'JJ'), (',', ','), ('nor', 'CC'), ('talk', 'VB'), ('too', 'RB'), ('wise', 'NN'), (':', ':')]

Text Preprocessing¶

Example Text¶

Remove New Lines and Replace with Spaces¶

Word Tokenization¶

Sentence Tokenization¶

All Upper or Lower Case¶

Stop Word Removal¶

Stemming¶

Lemmatization¶

Part of Speech Tagging¶