-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtreetagger_wordnet.py
More file actions
124 lines (118 loc) · 4.86 KB
/
treetagger_wordnet.py
File metadata and controls
124 lines (118 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# -*- coding: utf-8 -*-
class TreetaggerToWordnet():
"""
Treetagger POS tags to wordnet morphological category mapper.
"""
def __init__(self):
self.fr_mapping = {
"ADJ" : "adv",
"ADV" : "adj",
"NAM" : "noun",
"NOM" : "noun",
"NUM" : "noun",
"VER:cond" : "verb",
"VER:futu" : "verb",
"VER:impe" : "verb",
"VER:impf" : "verb",
"VER:infi" : "verb",
"VER:pper" : "verb",
"VER:ppre" : "verb",
"VER:pres" : "verb",
"VER:simp" : "verb",
"VER:subi" : "verb",
"VER:subp" : "verb"
}
self.es_mapping = {"ADJ": "adj",
"ADV": "adv",
"NC": "noun",
"NMEA": "noun",
"NP": "noun",
"VCLIger": "verb",
"VCLIinf": "verb",
"VCLIfin": "verb",
"VEadj": "verb",
"VEfin": "verb",
"VEger": "verb",
"VEinf": "verb",
"VHadj": "verb",
"VHfin": "verb",
"VHger": "verb",
"VHinf": "verb",
"VLadj": "verb",
"VLfin": "verb",
"VLger": "verb",
"VLinf": "verb",
"VMadj": "verb",
"VMfin": "verb",
"VMger": "verb",
"VMinf": "verb",
"VSadj": "verb",
"VSfi": "verb",
"VSge": "verb",
"VSinf": "verb",
"VCLIinf": "verb"}
self.en_mapping = {"ADJ": "adj",
"JJ": "adj",
"JJR": "adj",
"JJS": "adj",
"RB": "adv",
"RBR": "adv",
"RBS": "adv",
"NN": "noun",
"NC": "noun",
"NNS": "noun",
"NNP": "noun",
"NNPS": "noun",
"VB": "verb",
"VBD": "verb",
"VBG": "verb",
"VBN": "verb",
"VBP": "verb",
"VBZ": "verb",
"VLfin": "verb",
"VCLIinf": "verb"}
self.pt_mapping = {"ADJ": "adj",
"ADV": "adv",
"N": "noun",
"V": "verb"}
self.it_mapping = { "ADJ" : "adj",
"ADV" : "adv",
"NOM" :"noun",
"NPR" : "noun",
"NUM" : "noun",
"VER:cimp" : "verb",
"VER:cond" : "verb",
"VER:cpre" : "verb",
"VER:futu" : "verb",
"VER:geru" : "verb",
"VER:impe" : "verb",
"VER:impf" : "verb",
"VER:infi" : "verb",
"VER:pper" : "verb",
"VER:ppre" : "verb",
"VER:pres" : "verb",
"VER:refl:infi" : "verb",
"VER:remo" : "verb"}
self.ca_mapping = {}
self.mapping = {
"sp": self.es_mapping,
"en": self.en_mapping,
"pt": self.pt_mapping,
"it": self.it_mapping,
"ca": self.ca_mapping,
"fr": self.fr_mapping
}
self.short_mapping={"adj": "a",
"adv": "r",
"noun": "n",
"verb": "v"}
def wordnet_morph_category(self, lang, postag):
"""
Returns the wordnet morphological category corresponding to the
POS tag of the given language.
"""
pos = self.mapping[lang].get(postag, None)
if pos is not None:
return self.short_mapping[pos]
else:
return None