-
Notifications
You must be signed in to change notification settings - Fork 0
/
checker.no_import.js
79 lines (62 loc) · 3.87 KB
/
checker.no_import.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
//Thai-Spellcheck-JS : github.com/wishawa/thai-spellcheck-js
//If you don't want to use import, comment this line and
//import thaiSpellcheckerBackend from './thbrk.js';//thbrk.js is generated by emscripten (the WebAssembly SDK)
var Module;//This will store the WebAssembly Module object used to interact with backend
var breakerObject;//This will store the pointer to the loaded breaker, which is mostly the dictionary
var findBreaksFunctionWrapper;//This is a wrapper we use to call C function to find incorrect words
//The load function returns a Promise that resolves to the actual function that find misspellings
function loadThaiSpellchecker() {
return new Promise(function(resolve, reject) {
thaiSpellcheckerBackend().then(function(r) {//a promise that resolves to the WebAssembly Module object
Module = r;//Save the Module object to a global variable
findBreaksFunctionWrapper = Module.cwrap('th_brk_wc_find_breaks', 'number', ['number', 'number', 'number', 'number', 'number', 'number']);
//[ThBrk *brk, const thchar_t *s, int pos[], size_t pos_sz, int inc[], int *inc_ret]
breakerObject = Module.ccall('th_brk_new', 'number', ['string'], [null]);
//[const char *dictpath], left empty (null) to use default
resolve(checkThaiSpelling);
});
});
}
//input: a javascript string. ex. "ไข่ใก่ฟองนี้มีขะหนาดไหญ่"
//return: a 2D integer array of n rows and 2 columns, where n is the number of incorrectly spelled words
//the number in the first column is the index of the first character of a misspelled word
//the second number in the column is the index of the first letter of the word after the misspelled word
//ex. [[ 3, 6],
// [14, 24]]
function checkThaiSpelling(text) {
const l = text.length;
const utfText = Module._malloc((l + 1) * 2);//allocate space for text for backend
Module.stringToUTF16(text, utfText, (l + 1) * 2);//copy text to allocated space as UTF16
const pos = Module._malloc((l + 1) * 4);//allocate space for word break positions
const inc = Module._malloc((l + 1) * 4);//allocate space for incorrect word positions
const incRet = Module._malloc(4);//allocate a single int for number of incorrect words
const breakCount = findBreaksFunctionWrapper(breakerObject, utfText, pos, l+1, inc, incRet);//call the C (WASM) function
//pos: indexes of the first character of every word
//breakCount: number of words == length of pos
//inc: indexes of the first character of every incorrect word
Module._free(utfText);
const incCount = Module.getValue(incRet, 'i32');//get number of incorrect words from allocated int
//incCount: number of incorrect words == length of inc
Module._free(incRet);
var result = Array(incCount).fill(0).map(x => Array(2));//create empty 2D array of incCount rows and 2 columns
var posIndex = 0;//cursor to where we are in the pos array
var p;//for temporarily storing value from pos array
for(var i=0; i<incCount; i++) {
//copy incorrect word beginning index from C/WASM array to JS array
result[i][0] = Module.getValue(inc + i*4, 'i32');
p = l;
//find the end of the incorrect word by finding the first word that begins after the incorrect word
while(posIndex < breakCount && (p = Module.getValue(pos + posIndex*4, 'i32')) <= result[i][0]) {
//p: the index of the first letter of a word
//we want the lowest p that is higher than result[i][0]
//TODO: see if binary search can help here
posIndex++;
}
if(p <= result[i][0]) p = l;//if there is no word after the incorrect word, use the text length
result[i][1] = p;
}
Module._free(pos);
Module._free(inc);
return result;
}
//export {loadThaiSpellchecker};//export for loading with import / dynamic import