Skip to content

Commit 5ff004e

Browse files
authored
Improved logic of Boyer Moore & added few edge cases in test (williamfiset#187)
1 parent 1266ede commit 5ff004e

File tree

2 files changed

+102
-21
lines changed

2 files changed

+102
-21
lines changed

src/main/java/com/williamfiset/algorithms/strings/BoyerMooreStringSearch.java

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,43 @@ public class BoyerMooreStringSearch {
1717
* @return List of indexes where the pattern occurs
1818
*/
1919
public List<Integer> findOccurrences(String text, String pattern) {
20-
if (isNull(text) || isNull(pattern)) {
20+
if (isNull(text)
21+
|| isNull(pattern)
22+
|| pattern.length() > text.length()
23+
|| pattern.length() == 0) {
2124
return new ArrayList<>();
2225
}
2326
List<Integer> occurrences = new ArrayList<>();
24-
25-
Map<Character, Integer> skipTable = new HashMap<>();
26-
for (int i = 0; i < pattern.length(); i++) {
27-
skipTable.put(pattern.charAt(i), pattern.length() - i - 1);
28-
}
27+
Map<Character, Integer> skipTable = generateSkipTable(pattern);
2928

3029
int textIndex = pattern.length() - 1;
30+
int patternIndex = pattern.length() - 1;
3131
while (textIndex < text.length()) {
32-
int patternIndex = 0;
33-
boolean match = true;
34-
while (patternIndex < pattern.length() && match) {
35-
if (text.charAt(textIndex - patternIndex)
36-
!= pattern.charAt(pattern.length() - patternIndex - 1)) {
37-
match = false;
38-
textIndex +=
39-
skipTable.getOrDefault(text.charAt(textIndex - patternIndex), pattern.length());
32+
if (patternIndex >= 0 && pattern.charAt(patternIndex) == text.charAt(textIndex)) {
33+
if (patternIndex == 0) {
34+
occurrences.add(textIndex);
35+
} else {
36+
textIndex--;
4037
}
41-
patternIndex++;
42-
}
43-
if (match) {
44-
occurrences.add(textIndex - (pattern.length() - 1));
45-
textIndex += pattern.length();
38+
patternIndex--;
39+
} else {
40+
textIndex =
41+
textIndex
42+
+ pattern.length()
43+
- Math.min(
44+
Math.max(patternIndex, 0),
45+
1 + skipTable.getOrDefault(text.charAt(textIndex), 0));
46+
patternIndex = pattern.length() - 1;
4647
}
4748
}
48-
4949
return occurrences;
5050
}
51+
52+
private Map<Character, Integer> generateSkipTable(String pattern) {
53+
Map<Character, Integer> characterIndexMap = new HashMap<>();
54+
for (int charIndex = 0; charIndex < pattern.length(); charIndex++) {
55+
characterIndexMap.put(pattern.charAt(charIndex), charIndex);
56+
}
57+
return characterIndexMap;
58+
}
5159
}

src/test/java/com/williamfiset/algorithms/strings/BoyerMooreStringSearchTest.java

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
package com.williamfiset.algorithms.strings;
22

33
import static com.google.common.truth.Truth.assertThat;
4+
import static java.util.Objects.isNull;
45

6+
import java.util.ArrayList;
7+
import java.util.List;
8+
import java.util.Random;
59
import org.junit.Before;
610
import org.junit.Test;
711

812
public class BoyerMooreStringSearchTest {
913

1014
private BoyerMooreStringSearch underTest;
15+
private Random random;
16+
private final int MAX_ITERATION = 20;
1117

1218
@Before
1319
public void setup() {
1420
underTest = new BoyerMooreStringSearch();
21+
random = new Random();
1522
}
1623

1724
@Test
@@ -35,12 +42,78 @@ public void shouldReturnOneOccurrence() {
3542
}
3643

3744
@Test
38-
public void shouldReturnMultiplyOccurrences() {
45+
public void shouldReturnMultipleOccurrences() {
46+
assertThat(underTest.findOccurrences("SAAT TE", "TE")).containsExactly(5);
3947
assertThat(
4048
underTest.findOccurrences("Sample text for testing the Boyer-Moore algorithm.", "te"))
4149
.containsExactly(7, 16);
4250
assertThat(underTest.findOccurrences("Sample text for testing the Boyer-Moore algorithm.", " "))
4351
.containsExactly(6, 11, 15, 23, 27, 39);
52+
assertThat(underTest.findOccurrences("AABAACAADAABAABA", "AABA")).containsExactly(0, 9, 12);
53+
assertThat(underTest.findOccurrences("AAAAAAA", "AA")).containsExactly(0, 1, 2, 3, 4, 5);
54+
}
55+
56+
@Test
57+
public void shouldReturnEmptyForPatternLengthLargerThenText() {
58+
assertThat(underTest.findOccurrences("This is a Test Text", "This is a test Pattern"))
59+
.isEmpty();
60+
}
61+
62+
@Test
63+
public void shouldReturnDynamicString() {
64+
int runLength = random.nextInt(MAX_ITERATION) + 1;
65+
for (int run = 0; run < runLength; run++) {
66+
int upperCharText = random.nextInt(3);
67+
int upperCharPattern = random.nextInt(3);
68+
int maxLengthText =
69+
random.nextInt(1000) + 100; // random length of the string between [100=1000]
70+
int maxLengthPattern = random.nextInt(10);
71+
String text = generateRandomString(upperCharText, maxLengthText);
72+
String pattern = generateRandomString(upperCharPattern, maxLengthPattern);
73+
assertThat(underTest.findOccurrences(text, pattern))
74+
.containsExactlyElementsIn(getOccurrencesBruteForce(text, pattern));
75+
}
76+
}
77+
78+
/**
79+
* @param text the text being searched in
80+
* @param pattern the pattern that needs to be searched in text
81+
* @return a list of beginning index of text where pattern exits
82+
*/
83+
private List<Integer> getOccurrencesBruteForce(String text, String pattern) {
84+
if (isNull(text)
85+
|| isNull(pattern)
86+
|| text.length() < pattern.length()
87+
|| pattern.length() == 0) {
88+
return new ArrayList<>();
89+
}
90+
List<Integer> occurrences = new ArrayList<>();
91+
for (int textIndex = 0; textIndex < text.length() - pattern.length() + 1; textIndex++) {
92+
boolean match = true;
93+
for (int patIndex = 0; patIndex < pattern.length(); patIndex++) {
94+
if (text.charAt(textIndex + patIndex) != pattern.charAt(patIndex)) {
95+
match = false;
96+
break;
97+
}
98+
}
99+
if (match) {
100+
occurrences.add(textIndex);
101+
}
102+
}
103+
return occurrences;
104+
}
105+
106+
/**
107+
* @param upperLimitAscii Largest element in the random string
108+
* @param length Length of the random string
109+
* @return Returns a random string containing character between [a-z]
110+
*/
111+
private String generateRandomString(int upperLimitAscii, int length) {
112+
return random
113+
.ints('a', 'a' + upperLimitAscii + 1)
114+
.limit(length)
115+
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
116+
.toString();
44117
}
45118

46119
// TODO(william): Add a test that compares this implementation of Boyermoore

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy