Python NLP Ngrams - Geekscoders

Python NLP

About Lesson

In this Python NLP lesson we are going to learn about Python NLP Ngrams, so Ngrams are n words, it means that using Ngrams you need to specify the number of words that you want, for example you want to create 3 or 4, after creating the ngrams you need to specify that.

Now let’s create our example, if you see in this example we have used ngrams and we have specified 6 words for the ngrams, it means 6 consecutive words.

from nltk.corpus import webtext, stopwords
from nltk import ngrams
from nltk.probability import FreqDist

text_data = webtext.words('singles.txt')


stop_words = set(stopwords.words('english'))


filtered_words = []

for word in text_data:
    if word not  in stop_words:
        if len(word) > 3:
            filtered_words.append(word)



#now we are going to use ngrams for this

ngram = ngrams(filtered_words, 6)
# the number of words that we are searching


freq_dist = FreqDist(ngram)

print(freq_dist.most_common(10))

from nltk.corpus import webtext, stopwords

from nltk import ngrams

from nltk.probability import FreqDist

text_data = webtext.words('singles.txt')

stop_words = set(stopwords.words('english'))

filtered_words = []

for word in text_data:

if word not in stop_words:

if len(word) > 3:

filtered_words.append(word)

#now we are going to use ngrams for this

ngram = ngrams(filtered_words, 6)

# the number of words that we are searching

freq_dist = FreqDist(ngram)

print(freq_dist.most_common(10))

If you run the code this will be the result.

[(('SEXY', 'MALE', 'seeks', 'attrac', 'older', 'single'), 1), 
(('MALE', 'seeks', 'attrac', 'older', 'single', 'lady'), 1),
 (('seeks', 'attrac', 'older', 'single', 'lady', 'discreet'), 1), 
(('attrac', 'older', 'single', 'lady', 'discreet', 'encounters'), 1), 
(('older', 'single', 'lady', 'discreet', 'encounters', '35YO'), 1), 
(('single', 'lady', 'discreet', 'encounters', '35YO', 'Security'), 1),
 (('lady', 'discreet', 'encounters', '35YO', 'Security', 'Guard'), 1), 
(('discreet', 'encounters', '35YO', 'Security', 'Guard', 'seeking'), 1),
 (('encounters', '35YO', 'Security', 'Guard', 'seeking', 'lady'), 1),
 (('35YO', 'Security', 'Guard', 'seeking', 'lady', 'uniform'), 1)]

[(('SEXY', 'MALE', 'seeks', 'attrac', 'older', 'single'), 1),

(('MALE', 'seeks', 'attrac', 'older', 'single', 'lady'), 1),

(('seeks', 'attrac', 'older', 'single', 'lady', 'discreet'), 1),

(('attrac', 'older', 'single', 'lady', 'discreet', 'encounters'), 1),

(('older', 'single', 'lady', 'discreet', 'encounters', '35YO'), 1),

(('single', 'lady', 'discreet', 'encounters', '35YO', 'Security'), 1),

(('lady', 'discreet', 'encounters', '35YO', 'Security', 'Guard'), 1),

(('discreet', 'encounters', '35YO', 'Security', 'Guard', 'seeking'), 1),

(('encounters', '35YO', 'Security', 'Guard', 'seeking', 'lady'), 1),

(('35YO', 'Security', 'Guard', 'seeking', 'lady', 'uniform'), 1)]

Also you can plot the frequency distribution.

freq_dist.plot(5)

1	freq_dist.plot(5)

This is the result.