forked from NikolaiT/scrapeulous
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_runner.js
117 lines (95 loc) · 2.46 KB
/
test_runner.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env node
const fs = require('fs');
const clipboardy = require('clipboardy');
const UserAgent = require('user-agents');
class Worker {
constructor(options) {
this.logger = {
info: console.log,
error: console.error
};
this.options = options || {};
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
class HttpWorker extends Worker {
constructor(options) {
super(options);
}
async setup() {
this.UserAgent = UserAgent;
this.Got = require('got');
this.Cheerio = require('cheerio');
}
async turnDown() {}
}
class BrowserWorker extends Worker {
constructor(options) {
super(options);
}
async setup() {
this.UserAgent = UserAgent;
this.clipboardy = clipboardy;
const puppeteer = require('puppeteer');
this.browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
args: ['--start-maximized'],
});
this.page = await this.browser.newPage();
}
async turnDown() {
await this.page.close();
await this.browser.close();
}
}
class TestRunner {
constructor(crawler, items, options) {
this.items = items;
this.options = options;
try {
this.crawler_code = fs.readFileSync(crawler);
} catch (err) {
console.error(`Could not read crawler: ${err}`);
process.exit(-1);
}
}
async run() {
if (this.crawler_code.includes('extends HttpWorker')) {
} else if (this.crawler_code.includes('extends BrowserWorker')) {
} else {
console.error(`Invalid worker`);
process.exit(-1);
}
let Worker = eval(`(${this.crawler_code})`);
let instance = new Worker(this.options);
await instance.setup();
let results = [];
for (let item of this.items) {
results.push(await instance.crawl(item));
}
await instance.turnDown();
return results;
}
}
(async () => {
let items = eval(process.argv[3]) || ['https://ipinfo.io/json'];
if (!Array.isArray(items)) {
console.error(`items must be an array`);
process.exit(-1);
}
let crawler = process.argv[2] || 'http.js';
let options = {};
if (crawler === 'leads.js') {
options.advanced = false;
}
if (crawler === 'social.js') {
options.link_depth = 1;
}
console.log(`Running crawler ${crawler} with options ${JSON.stringify(options)}`);
let tester = new TestRunner(crawler, items, options);
let results = await tester.run();
console.dir(results, { depth: null });
})();