-
Notifications
You must be signed in to change notification settings - Fork 4
/
WordSegmenterUtilities.py
120 lines (83 loc) · 1.94 KB
/
WordSegmenterUtilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# coding: utf-8
# In[1]:
import scipy.io
import numpy as np
import matplotlib.pyplot as pyplot
from PIL import Image
import matplotlib.cm as cm
from pprint import pprint
import scipy.misc
import PIL
import KMeansUtilities as km
# In[2]:
def is_background(mat,colno):
row,col=mat.shape
for i in range(0,row):
if mat[i][colno]<0.9:
return False
return True
# In[3]:
def get_next_nonbackground(mat,colno):
row,col=mat.shape
for i in range(colno+1,col):
if is_background(mat,i)==False:
return i
return col-1
# In[17]:
def get_threshold(mat):
diff_mat=[]
row,col=mat.shape
i=0
while i<col:
if is_background(mat,i)==True:
next_bac=get_next_nonbackground(mat,i)
diff_mat.append([next_bac-i])
i=next_bac
i+=1
return diff_mat
# In[30]:
def get_words(mat,threshold):
divs=[]
divs.append(0)
row,col=mat.shape
for i in range(col):
if is_background(mat,i)==False:
next_bac=get_next_nonbackground(mat,i)
if next_bac-i>threshold:
divs.append(i)
divs.append(next_bac)
i=next_bac
divs.append(col-1)
return divs
# In[35]:
def crop_line(mat):
row,col=mat.shape
newimg=[]
front=0
for i in range(col):
if is_background(mat,i)==False:
front=i-2
break
newimg=mat[:,front-1:]
return newimg
def main(filename):
img=scipy.misc.imread(filename)
img=img/255.0
img=crop_line(img)
g=get_threshold(img)
f=km.get_clusters(g,2)
mini=999999
x=[]
if f[1][0][0]>f[0][0][0]:
x=f[1]
else:
x=f[0]
for i in x:
if i[0] <mini:
mini=i[0]
d=get_words(img,mini)
words=[]
for i in range(0,len(d)-1,2):
if d[i+1]-d[i]>0:
words.append(img[:,d[i]:d[i+1]+2])
return words