-
Notifications
You must be signed in to change notification settings - Fork 4.3k
/
SuffixArrayMed.java
120 lines (94 loc) · 3.55 KB
/
SuffixArrayMed.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/**
* Medium speed suffix array implementation. Time Complexity: O(nlog^2(n))
*
* @author William Fiset, william.alexandre.fiset@gmail.com
*/
package com.williamfiset.algorithms.datastructures.suffixarray;
public class SuffixArrayMed extends SuffixArray {
// Wrapper class to help sort suffix ranks
static class SuffixRankTuple implements Comparable<SuffixRankTuple> {
int firstHalf, secondHalf, originalIndex;
// Sort Suffix ranks first on the first half then the second half
@Override
public int compareTo(SuffixRankTuple other) {
int cmp = Integer.compare(firstHalf, other.firstHalf);
if (cmp == 0) return Integer.compare(secondHalf, other.secondHalf);
return cmp;
}
@Override
public String toString() {
return originalIndex + " -> (" + firstHalf + ", " + secondHalf + ")";
}
}
public SuffixArrayMed(String text) {
super(toIntArray(text));
}
public SuffixArrayMed(int[] text) {
super(text);
}
// Construct a suffix array in O(nlog^2(n))
@Override
protected void construct() {
sa = new int[N];
// Maintain suffix ranks in both a matrix with two rows containing the
// current and last rank information as well as some sortable rank objects
int[][] suffixRanks = new int[2][N];
SuffixRankTuple[] ranks = new SuffixRankTuple[N];
// Assign a numerical value to each character in the text
for (int i = 0; i < N; i++) {
suffixRanks[0][i] = T[i];
ranks[i] = new SuffixRankTuple();
}
// O(log(n))
for (int pos = 1; pos < N; pos *= 2) {
for (int i = 0; i < N; i++) {
SuffixRankTuple suffixRank = ranks[i];
suffixRank.firstHalf = suffixRanks[0][i];
suffixRank.secondHalf = i + pos < N ? suffixRanks[0][i + pos] : -1;
suffixRank.originalIndex = i;
}
// O(nlog(n))
java.util.Arrays.sort(ranks);
int newRank = 0;
suffixRanks[1][ranks[0].originalIndex] = 0;
for (int i = 1; i < N; i++) {
SuffixRankTuple lastSuffixRank = ranks[i - 1];
SuffixRankTuple currSuffixRank = ranks[i];
// If the first half differs from the second half
if (currSuffixRank.firstHalf != lastSuffixRank.firstHalf
|| currSuffixRank.secondHalf != lastSuffixRank.secondHalf) newRank++;
suffixRanks[1][currSuffixRank.originalIndex] = newRank;
}
// Place top row (current row) to be the last row
suffixRanks[0] = suffixRanks[1];
// Optimization to stop early
if (newRank == N - 1) break;
}
// Fill suffix array
for (int i = 0; i < N; i++) {
sa[i] = ranks[i].originalIndex;
ranks[i] = null;
}
// Cleanup
suffixRanks[0] = suffixRanks[1] = null;
suffixRanks = null;
ranks = null;
}
public static void main(String[] args) {
// String[] strs = { "AAGAAGC", "AGAAGT", "CGAAGC" };
// String[] strs = { "abca", "bcad", "daca" };
// String[] strs = { "abca", "bcad", "daca" };
// String[] strs = { "AABC", "BCDC", "BCDE", "CDED" };
// String[] strs = { "abcdefg", "bcdefgh", "cdefghi" };
// String[] strs = { "xxx", "yyy", "zzz" };
// TreeSet <String> lcss = SuffixArrayMed.lcs(strs, 2);
// System.out.println(lcss);
// SuffixArrayMed sa = new SuffixArrayMed("abracadabra");
// System.out.println(sa);
// System.out.println(java.util.Arrays.toString(sa.sa));
// System.out.println(java.util.Arrays.toString(sa.lcp));
SuffixArrayMed sa = new SuffixArrayMed("ABBABAABAA");
// SuffixArrayMed sa = new SuffixArrayMed("GAGAGAGAGAGAG");
System.out.println(sa);
}
}