aboutsummaryrefslogtreecommitdiff
path: root/w2
diff options
context:
space:
mode:
authorakiyamn2021-03-18 15:06:37 +1100
committerakiyamn2021-03-18 15:06:37 +1100
commit0dfa5353c274deb9aa455475965cf060e8ce4bd9 (patch)
treed8885240bafcde4e5c3767273df98836e21d83a9 /w2
parent527ce5065ae429643433fac3435aeaa8f535e7c1 (diff)
downloadfit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.tar.gz
fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.zip
Start of ass1
Diffstat (limited to 'w2')
-rw-r--r--w2/w2lab/main.py66
-rw-r--r--w2/w2lect.md30
2 files changed, 96 insertions, 0 deletions
diff --git a/w2/w2lab/main.py b/w2/w2lab/main.py
new file mode 100644
index 0000000..2914da3
--- /dev/null
+++ b/w2/w2lab/main.py
@@ -0,0 +1,66 @@
+def naive(string):
+ z = [0 for _ in string]
+ z[0] = len(string)
+ for i in range(1, len(string)):
+ for j in range(0, len(string)):
+ if i+j == len(string) or string[i+j] != string[j]:
+ z[i] = j
+ break
+ return z
+
+
+def naive2(string):
+ z = [0 for _ in string]
+ z[0] = len(string)
+ for i in range(1, len(string)):
+ z[i] = compare(string, i, len(string))
+ return z
+
+
+def compare(string, i, end):
+ for j in range(end):
+ if i+j == end or string[i+j] != string[j]:
+ return j
+
+
+def gusfield(string):
+ z = [0 for _ in string]
+ z[0] = len(string)
+ r = 0
+ l = 0
+ for i in range(1, len(string)):
+ if i == 1: # base case
+ z[1] = compare(string, i, len(string))
+ if z[1] > 0:
+ r = z[1] + 1
+ l = 1
+ elif i > r: # Case 1
+ z[i] = compare(string, i, len(string))
+ if z[i] > 0:
+ q = i + z[i]
+ r = q - 1
+ l = i
+ elif i <= r: # Case 2
+ if z[i-l] < r-i: # 2a
+ print(f"{i} is a case 2a")
+ z[i] = z[i-l]
+ else: # 2b
+ print(f"{i} is a case 2b")
+ q = compare(string, i, len(string))
+ z[i] = q
+ r = q
+ l = i
+
+ print(f"{l=}, {r=}")
+ return z
+
+
+def main():
+ string = "ababacababa"
+ print(naive2(string))
+ print("="*15)
+ print(gusfield(string))
+
+if __name__ == '__main__':
+ main()
+
diff --git a/w2/w2lect.md b/w2/w2lect.md
new file mode 100644
index 0000000..3fee16a
--- /dev/null
+++ b/w2/w2lect.md
@@ -0,0 +1,30 @@
+# Boyer-Moore
+
+## Bad character rule:
+
+Shift pattern along to the left to the right-most version of the bad character
+$O(m+n)$ *mostly*
+
+## Extended bad character rule
+- 2D array for each char and each position in the pattern
+- **Reduces naive shifts (i.e. by 1 to the right) but takes more space**
+ - Could use linked lists or something but that takes more time
+
+## Good suffix rule
+**Makes Boyer-Moore worst case *almost* linear time rather than squared time**
+- A suffix before the bad character that you know matches the text
+- character to the left of the next instance of the good suffix must be different to the one to the left of the original suffix
+- Move pattern along to the right to the point where the next suffix in the pattern matches
+
+## Galil's optimization
+Improves on good suffix rules
+**Actually linear time**
+extended just makes it slightly faster and doesn't change the complexity
+
+# KNP
+- $O(m+n)$
+- Easier to write, simpler
+- Slower in practise than Boyer-Moore
+- If mismatched first charatcer, move left by 1 (not covered by slides)
+- Use Galil's on this too
+- Proof is examinable (BM isn't)