-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTask_Extraction
More file actions
72 lines (66 loc) · 2.09 KB
/
Task_Extraction
File metadata and controls
72 lines (66 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#The coding was done in jupyter notebook installed along the Anaconda Distributions, hence, many libraries are already present
#importing necessary libraries
import nltk
from nltk import pos_tag, word_tokenize
from nltk.corpus import stopwords
from nltk.tag import RegexpTagger
import re
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
#Making a Custom Tagger based on NLTK
tag1 = RegexpTagger([
(r'.*today$', 'CD'),
(r'.*tonight$', 'CD'),
(r'.*tomorrow$', 'CD'),
(r'.*Monday$', 'CD'),
(r'.*Tuesday$', 'CD'),
(r'.*Wednesday$', 'CD'),
(r'.*Thursday$', 'CD'),
(r'.*Friday$', 'CD'),
(r'.*Saturday$', 'CD'),
(r'.*Sunday$', 'CD'),
(r'.*morning$', 'CD'),
(r'.*afternoon$', 'CD'),
(r'.*evening$', 'CD'),
(r'.*night$', 'CD'),
(r'.*monday$', 'CD'),
(r'.*tuesday$', 'CD'),
(r'.*wednesday$', 'CD'),
(r'.*thursday$', 'CD'),
(r'.*friday$', 'CD'),
(r'.*saturday$', 'CD'),
(r'.*sunday$', 'CD'),
(r'.*ll$','MD')
])
def tg(sent):
ne=list()
sent=nltk.word_tokenize(sent)
for i in sent:
if tag1.tag(nltk.word_tokenize(i))[0][1]==None:
ne.append(tuple([nltk.pos_tag(nltk.word_tokenize(i))[0][0],nltk.pos_tag(nltk.word_tokenize(i))[0][1]]))
else:
ne.append(tuple([tag1.tag(nltk.word_tokenize(i))[0][0],tag1.tag(nltk.word_tokenize(i))[0][1]]))
return ne
#Function to chunk out the part of a sentence satisfying the defined pattern
def extract_NN(sent):
grammar = """
NP:{<.*>*<MD|PDT|VB|VBG|NN.*|PRP.*><.*>*<IN><.*>*<CD><.*>*}
{<IN><.*>*<CD><.*>*<MD|PDT|VB|VBG|NN.*|PRP.*><.*>*}
"""
chunker = nltk.RegexpParser(grammar)
ne = set()
chunk = chunker.parse(tg(sent))
for tree in chunk.subtrees(filter=lambda t: t.label() == 'NP'):
ne.add(' '.join([child[0] for child in tree.leaves()]))
if len(ne)>0:
print(ne)
return ne
#Getting Input
text="""
Hey driver, please be early tomorrow, I have my flight at 6 in the morning.
"""
#Dividing the text into sentences
sentences = re.split(r' *[\.\?!,][\'"\)\]]* *', text)
#Printing all the tasks from all the sentences
for stuff in sentences:
extract_NN(stuff)