diff options
author | George Fraser <george@fivetran.com> | 2018-12-29 01:15:04 -0800 |
---|---|---|
committer | George Fraser <george@fivetran.com> | 2018-12-29 01:15:04 -0800 |
commit | 42db035b1099267db8ad4c7f39e3991ff4aad697 (patch) | |
tree | d10256ce649d53ed1c4d25f0cc13f14f6825d72e /src | |
parent | f342aa14b13149cf061942584edd447f37edd3cd (diff) | |
download | java-language-server-42db035b1099267db8ad4c7f39e3991ff4aad697.zip |
Boyer–Moore string search is faster but not the same semantics
Diffstat (limited to 'src')
-rw-r--r-- | src/main/java/org/javacs/JavaCompilerService.java | 4 | ||||
-rw-r--r-- | src/main/java/org/javacs/Parser.java | 13 | ||||
-rw-r--r-- | src/main/java/org/javacs/StringSearch.java | 148 | ||||
-rw-r--r-- | src/test/java/org/javacs/StringSearchTest.java | 32 |
4 files changed, 196 insertions, 1 deletions
diff --git a/src/main/java/org/javacs/JavaCompilerService.java b/src/main/java/org/javacs/JavaCompilerService.java index fd5abb5..06ac779 100644 --- a/src/main/java/org/javacs/JavaCompilerService.java +++ b/src/main/java/org/javacs/JavaCompilerService.java @@ -339,14 +339,16 @@ public class JavaCompilerService { } public List<TreePath> findSymbols(String query, int limit) { - LOG.info(String.format("Searching for `%s`", query)); + LOG.info(String.format("Searching for `%s`...", query)); var result = new ArrayList<TreePath>(); var files = allJavaSources(); for (var file : files) { if (!Parser.containsWordMatching(file, query)) continue; + LOG.info(String.format("...%s contains text matches", file.getFileName())); var parse = Parser.parse(file); var symbols = Parser.findSymbolsMatching(parse, query); + if (symbols.size() > 0) LOG.info(String.format("...found %d occurrences", symbols.size())); result.addAll(symbols); if (result.size() >= limit) break; } diff --git a/src/main/java/org/javacs/Parser.java b/src/main/java/org/javacs/Parser.java index 6418c74..27aa40e 100644 --- a/src/main/java/org/javacs/Parser.java +++ b/src/main/java/org/javacs/Parser.java @@ -111,6 +111,19 @@ class Parser { } } + static boolean containsText(Path java, String query) { + try { + var search = new StringSearch(query); + var reader = Files.newBufferedReader(java); + for (var line = reader.readLine(); line != null; line = reader.readLine()) { + if (search.next(line) != -1) return true; + } + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + static List<TreePath> findSymbolsMatching(CompilationUnitTree parse, String query) { class Find extends TreePathScanner<Void, Void> { List<TreePath> found = new ArrayList<>(); diff --git a/src/main/java/org/javacs/StringSearch.java b/src/main/java/org/javacs/StringSearch.java new file mode 100644 index 0000000..12d6f02 --- /dev/null +++ b/src/main/java/org/javacs/StringSearch.java @@ -0,0 +1,148 @@ +package org.javacs; + +// Translated from https://golang.org/src/strings/search.go + +// Search efficiently finds strings in a source text. It's implemented +// using the Boyer-Moore string search algorithm: +// https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm +// https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged +// document uses 1-based indexing) +class StringSearch { + // pattern is the string that we are searching for in the text. + private final byte[] pattern; + + // badCharSkip[b] contains the distance between the last byte of pattern + // and the rightmost occurrence of b in pattern. If b is not in pattern, + // badCharSkip[b] is len(pattern). + // + // Whenever a mismatch is found with byte b in the text, we can safely + // shift the matching frame at least badCharSkip[b] until the next time + // the matching char could be in alignment. + // TODO 256 is not coloring + private final int[] badCharSkip = new int[256]; + + // goodSuffixSkip[i] defines how far we can shift the matching frame given + // that the suffix pattern[i+1:] matches, but the byte pattern[i] does + // not. There are two cases to consider: + // + // 1. The matched suffix occurs elsewhere in pattern (with a different + // byte preceding it that we might possibly match). In this case, we can + // shift the matching frame to align with the next suffix chunk. For + // example, the pattern "mississi" has the suffix "issi" next occurring + // (in right-to-left order) at index 1, so goodSuffixSkip[3] == + // shift+len(suffix) == 3+4 == 7. + // + // 2. If the matched suffix does not occur elsewhere in pattern, then the + // matching frame may share part of its prefix with the end of the + // matching suffix. In this case, goodSuffixSkip[i] will contain how far + // to shift the frame to align this portion of the prefix to the + // suffix. For example, in the pattern "abcxxxabc", when the first + // mismatch from the back is found to be in position 3, the matching + // suffix "xxabc" is not found elsewhere in the pattern. However, its + // rightmost "abc" (at position 6) is a prefix of the whole pattern, so + // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11. + private final int[] goodSuffixSkip; + + StringSearch(String pattern) { + this(pattern.getBytes()); + } + + StringSearch(byte[] pattern) { + this.pattern = pattern; + this.goodSuffixSkip = new int[pattern.length]; + + // last is the index of the last character in the pattern. + var last = pattern.length - 1; + + // Build bad character table. + // Bytes not in the pattern can skip one pattern's length. + for (var i = 0; i < badCharSkip.length; i++) { + badCharSkip[i] = pattern.length; + } + // The loop condition is < instead of <= so that the last byte does not + // have a zero distance to itself. Finding this byte out of place implies + // that it is not in the last position. + for (var i = 0; i < last; i++) { + badCharSkip[pattern[i] + 128] = last - i; + } + + // Build good suffix table. + // First pass: set each value to the next index which starts a prefix of + // pattern. + var lastPrefix = last; + for (var i = last; i >= 0; i--) { + if (hasPrefix(pattern, new Slice(pattern, i + 1))) lastPrefix = i + 1; + // lastPrefix is the shift, and (last-i) is len(suffix). + goodSuffixSkip[i] = lastPrefix + last - i; + } + // Second pass: find repeats of pattern's suffix starting from the front. + for (var i = 0; i < last; i++) { + var lenSuffix = longestCommonSuffix(pattern, new Slice(pattern, 1, i + 1)); + if (pattern[i - lenSuffix] != pattern[last - lenSuffix]) { + // (last-i) is the shift, and lenSuffix is len(suffix). + goodSuffixSkip[last - lenSuffix] = lenSuffix + last - i; + } + } + } + + int next(String text) { + return next(text.getBytes()); + } + + int next(byte[] text) { + var i = pattern.length - 1; + while (i < text.length) { + // Compare backwards from the end until the first unmatching character. + var j = pattern.length - 1; + while (j >= 0 && text[i] == pattern[j]) { + i--; + j--; + } + if (j < 0) { + return i + 1; // match + } + i += Math.max(badCharSkip[text[i] + 128], goodSuffixSkip[j]); + } + return -1; + } + + private boolean hasPrefix(byte[] s, Slice prefix) { + for (int i = 0; i < prefix.length(); i++) { + if (s[i] != prefix.get(i)) return false; + } + return true; + } + + private int longestCommonSuffix(byte[] a, Slice b) { + int i = 0; + for (; i < a.length && i < b.length(); i++) { + if (a[a.length - 1 - i] != b.get(b.length() - 1 - i)) { + break; + } + } + return i; + } + + private static class Slice { + private final byte[] target; + private int from, until; + + int length() { + return until - from; + } + + byte get(int i) { + return target[from + i]; + } + + public Slice(byte[] target, int from) { + this(target, from, target.length); + } + + public Slice(byte[] target, int from, int until) { + this.target = target; + this.from = from; + this.until = until; + } + } +} diff --git a/src/test/java/org/javacs/StringSearchTest.java b/src/test/java/org/javacs/StringSearchTest.java new file mode 100644 index 0000000..7827fcd --- /dev/null +++ b/src/test/java/org/javacs/StringSearchTest.java @@ -0,0 +1,32 @@ +package org.javacs; + +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.*; + +import org.junit.Test; + +public class StringSearchTest { + private void testNext(String pat, String text, int index) { + var got = new StringSearch(pat).next(text); + assertThat(got, equalTo(index)); + } + + @Test + public void testFinderNext() { + testNext("", "", 0); + testNext("", "abc", 0); + testNext("abc", "", -1); + testNext("abc", "abc", 0); + testNext("d", "abcdefg", 3); + testNext("nan", "banana", 2); + testNext("pan", "anpanman", 2); + testNext("nnaaman", "anpanmanam", -1); + testNext("abcd", "abc", -1); + testNext("abcd", "bcd", -1); + testNext("bcd", "abcd", 1); + testNext("abc", "acca", -1); + testNext("aa", "aaa", 0); + testNext("baa", "aaaaa", -1); + testNext("at that", "which finally halts. at that point", 22); + } +} |