/
2_Trie_Tree.c
120 lines (110 loc) · 3.17 KB
/
2_Trie_Tree.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*************************************************************************
> File Name: 2cha.c
> Author: snowflake
> Mail: ︿( ̄︶ ̄)︿
> Created Time: 2018年08月08日 星期三 21时00分59秒
************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BASE 2
#define BL '0'
unsigned char buff[256][10] = {0};
typedef struct Node {
char *str;
int flag;
struct Node *next[BASE];
struct Node *fail;
} Node;
int node_cnt = 0;
Node *get_new_node() {
Node *p = (Node *)calloc(sizeof(Node), 1);
node_cnt += 1;
return p;
}
void clear(Node *node) {
if (node == NULL) return ;
for (int i = 0; i < BASE; i++) {
if (node->next[i] == NULL) continue;
clear(node->next[i]);
}
if (node->flag) free(node->str);
free(node);
return ;
}
Node *insert(Node *root, const unsigned char *pattern) {
if (root == NULL) root = get_new_node();
Node *p = root;
int len = strlen((char *)pattern);
for (int i = 0; i < len - 2; i++) {
char temp[10] = {0};
strncpy(temp, (char *)buff[pattern[i]], strlen((char *)buff[pattern[i]]));
//printf("%s\n", temp);
for (int j = 0; temp[j]; j++) {
int ind = temp[j] - BL;
if (p->next[ind] == NULL) p->next[ind] = get_new_node();
p = p->next[ind];
}
memset(temp, 0, 10);
}
p->flag = 1;
p->str = strdup((char *)pattern);
printf("%s", p->str);
return root;
}
void search(Node *root, const unsigned char *text) {
Node *p = root;
int len = strlen((char *)text);
printf("%d\n", len * 10);
unsigned char *text_temp = (unsigned char *)calloc(sizeof(unsigned char), len * 10);
for (int i = 0; text[i]; i++) {
strncat((char *)text_temp, (char *)buff[text[i]], strlen((char *)buff[text[i]]));
}
//printf("%s\n", text_temp);
for (int i = 0; text_temp[i]; i++) {
int j = i;
p = root;
while (text_temp[j] && p && p->next[text_temp[j] - BL]) {
//search_times += 1;
p = p->next[text_temp[j] - BL];
if (p->flag == 1) {
printf("find word : %s", p->str);
break;
}
j++;
}
}
free(text_temp);
return ;
}
char *bit(int n) {
char *bit = (char *)calloc(sizeof(char), 10);
int i = 0;
while (n) {
bit[i++] = n % 2 + '0';
n /= 2;
}
return bit;
}
int main() {
for (int i = 0; i < 256; i++) {
strcpy((char *)buff[i], bit(i));
}
int total_count = 0;
FILE *fin = fopen("./pattern", "r");
if (fin == NULL) perror("fopen\n");
unsigned char pattern[10000] = {0};
Node *root = NULL;
while (fgets((char *)pattern, 10000, fin) != NULL) {
int len = strlen((char *)pattern);
total_count += len;
root = insert(root, pattern);
}
unsigned char text[1000000] = {0};
FILE *fp = fopen("text", "r");
if (fp == NULL) perror("fopen\n");
fscanf(fp, "%[^\n]s", text);
search(root, text);
printf("storage rate : %lf\n", 1.0 * total_count / (1.0 * node_cnt * sizeof(Node)));
return 0;
}