-
Notifications
You must be signed in to change notification settings - Fork 0
/
FileProcessor.cs
44 lines (37 loc) · 1.78 KB
/
FileProcessor.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
using Newtonsoft.Json;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
namespace ConcurrentEx
{
public class FileProcessor : Processor
{
private static readonly List<string> IGNORE_WORDS = new() { "", "the", "and", "she", "you", "have", "has", "does", "are", "for", "this", "was", "were", "on", "in", "had", "that", "they" ,"his","with","their","not","been","them", "all","which","from","out","there","but", "him", "other", "did", "into", "than", "every", "any", "what", "her", "never", "after", "very", "about", "even" , "our", "no", "of", "is", "we", "do", "to", "it", "he", ""};
public BlockingCollection<string> _sentenceQueue;
public ConcurrentDictionary<string, int> _wordCountDic;
public bool _stillReadingFile;
public FileProcessor(BlockingCollection<string> sentenceQueue, ConcurrentDictionary<string, int> wordCountDic)
{
_sentenceQueue = sentenceQueue;
_wordCountDic = wordCountDic;
_stillReadingFile = true;
}
protected override async Task StartProcessing()
{
foreach (var sentence in _sentenceQueue.GetConsumingEnumerable())
{
var splitSentence = SplitSentence(sentence);
var cleanSentence = splitSentence.Where(word => !IGNORE_WORDS.Contains(word)).ToList();
cleanSentence.ForEach(word =>
{
_wordCountDic.AddOrUpdate(word, 1, (key, oldValue) => oldValue + 1);
});
}
}
private static List<string> SplitSentence(string sentence) => Regex.Split(sentence, @"\W+").ToList();
}
}