aboutsummaryrefslogtreecommitdiff
path: root/ass1/binary_boyermoore.py
diff options
context:
space:
mode:
authorakiyamn2021-03-23 16:26:18 +1100
committerakiyamn2021-03-23 16:26:18 +1100
commitde34b185172d1ce1b72400e9806600e3cd795f1b (patch)
treed6418d08b991237df4d90922e50dcb366e36b65a /ass1/binary_boyermoore.py
parenta7d2f9a2cbaf5b4fc2135a37d80481105359c867 (diff)
downloadfit3155-de34b185172d1ce1b72400e9806600e3cd795f1b.tar.gz
fit3155-de34b185172d1ce1b72400e9806600e3cd795f1b.zip
boyer-moore minus galil
Diffstat (limited to 'ass1/binary_boyermoore.py')
-rw-r--r--ass1/binary_boyermoore.py84
1 files changed, 78 insertions, 6 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 1d60f52..d9b813d 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -1,27 +1,99 @@
-txt = "xpbctbxabacbxtbpqa"
-pat = "tbapxab"
+txt = "zzzzzzabcdzzzzzz"
+# pat = "tbapxab"
+pat = "abcd"
m = len(pat)
n = len(txt)
R = [m for _ in range(0, 26)]
+good_suffix = [0 for _ in range(0, m+1)]
def alpha_number(char):
return ord(char) - 97
+
+def reverse(string):
+ return string[::-1]
+
+
+def compare(string, i, end):
+ for j in range(end):
+ if i+j == end or string[i+j] != string[j]:
+ return j
+
+
+def gusfield(string):
+ z = [0 for _ in string]
+ z[0] = len(string)
+ r = 0
+ l = 0
+ for i in range(1, len(string)):
+ if i == 1: # base case
+ z[1] = compare(string, i, len(string))
+ if z[1] > 0:
+ r = z[1] + 1
+ l = 1
+ elif i > r: # Case 1
+ z[i] = compare(string, i, len(string))
+ if z[i] > 0:
+ q = i + z[i]
+ r = q - 1
+ l = i
+ elif i <= r: # Case 2
+ if z[i-l] < r-i: # Case 2a
+ z[i] = z[i-l]
+ else: # Case 2b
+ q = compare(string, i, len(string))
+ z[i] = q
+ r = q
+ l = i
+ return z
+
+
for i in range(m):
R[alpha_number(pat[i])] = i
+Z = reverse(gusfield(reverse(pat)))+[0]
+print(list(pat))
+print(Z)
+
+for i in range(m):
+ j = m - Z[i]
+ good_suffix[j] = i
+
+print(good_suffix)
+
+matched_prefix = gusfield(pat)+[0]
+for i in range(m-1, -1, -1):
+ matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])
+
+print(matched_prefix)
+
j = 0
while j < n-m:
+ matches = 0
for i in range(m-1, -1, -1):
print(j)
match = pat[i] == txt[j+i]
if match:
- pass
+ matches += 1
+ if matches == m:
+ print(f"Match at {j}")
+ good_suffix_shift = m - matched_prefix[1]
+ j += good_suffix_shift
+ i = m-1
+ break
# print(i)
else:
mismatched = txt[j+i]
- shift = i-R[alpha_number(mismatched)]
- j += max(1, shift)
+ bad_char_shift = i-R[alpha_number(mismatched)]
+ good_suffix_shift = 1
+ if good_suffix[i+1] > 0:
+ good_suffix_shift = m - good_suffix[i+1]-1
+ elif good_suffix[i+1] == 0:
+ good_suffix_shift = m - matched_prefix[i+1]
+ elif matches == m:
+ print(f"Match at {j}")
+ good_suffix_shift = m - matched_prefix[1]
+ j += max(good_suffix_shift, bad_char_shift)
if j > n-m:
break
-print(R) \ No newline at end of file
+print(R)