-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
48 lines (39 loc) · 1.42 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
from analyzer.services import Analyzer
data_dir = os.getenv('ANALYZER_DATA')
if __name__ == '__main__':
an = Analyzer(data_dir)
# adiciona 'intenção' como uma feature a ser analisada
# por exemplo, 'comprar', 'vender', 'alugar', etc
# nesse caso vamos utilizar apenas 'buy' ou 'sell'.
an.add_feature_set("doctype")
train_set = []
# get training data from files
for fname in os.listdir("data/trainset/"):
fname = os.path.join("data/trainset/", fname)
if os.path.isfile(fname):
with open(fname, 'r') as fp:
print(fname)
content = fp.read()
train_data = {
'doctype': 'Article of Incorporation',
}
print(train_data)
train_set.append((content, train_data))
# get training data from trash
for fname in os.listdir("data/trashset/"):
fname = os.path.join("data/trashset/", fname)
if os.path.isfile(fname):
with open(fname, 'r') as fp:
print(fname)
content = fp.read()
train_data = {
'doctype': 'Unknown',
}
print(train_data)
train_set.append((content, train_data))
for data in train_set:
sentence, feature_labels = data
an.train(sentence, feature_labels)
an.build()
print("Done!")