Start of ass1

author: akiyamn 2021-03-18 15:06:37 +1100
committer: akiyamn 2021-03-18 15:06:37 +1100
commit: 0dfa5353c274deb9aa455475965cf060e8ce4bd9 (patch)
tree: d8885240bafcde4e5c3767273df98836e21d83a9 /w2
parent: 527ce5065ae429643433fac3435aeaa8f535e7c1 (diff)
download: fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.tar.gz
fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.zip
2 files changed, 96 insertions, 0 deletions
diff --git a/w2/w2lab/main.py b/w2/w2lab/main.py
new file mode 100644
index 0000000..2914da3
--- /dev/null
+++ b/w2/w2lab/main.py
@@ -0,0 +1,66 @@
+def naive(string):
+    z = [0 for _ in string]
+    z[0] = len(string)
+    for i in range(1, len(string)):
+        for j in range(0, len(string)):
+            if i+j == len(string) or string[i+j] != string[j]:
+                z[i] = j
+                break
+    return z
+
+
+def naive2(string):
+    z = [0 for _ in string]
+    z[0] = len(string)
+    for i in range(1, len(string)):
+        z[i] = compare(string, i, len(string))
+    return z
+
+
+def compare(string, i, end):
+    for j in range(end):
+        if i+j == end or string[i+j] != string[j]:
+            return j
+
+
+def gusfield(string):
+    z = [0 for _ in string]
+    z[0] = len(string)
+    r = 0
+    l = 0
+    for i in range(1, len(string)):
+        if i == 1:  # base case
+            z[1] = compare(string, i, len(string))
+            if z[1] > 0:
+                r = z[1] + 1
+                l = 1
+        elif i > r: # Case 1
+            z[i] = compare(string, i, len(string))
+            if z[i] > 0:
+                q = i + z[i]
+                r = q - 1
+                l = i
+        elif i <= r:  # Case 2
+            if z[i-l] < r-i:  # 2a
+                print(f"{i} is a case 2a")
+                z[i] = z[i-l]
+            else:  # 2b
+                print(f"{i} is a case 2b")
+                q = compare(string, i, len(string))
+                z[i] = q
+                r = q
+                l = i
+
+    print(f"{l=}, {r=}")
+    return z
+
+
+def main():
+    string = "ababacababa"
+    print(naive2(string))
+    print("="*15)
+    print(gusfield(string))
+
+if __name__ == '__main__':
+    main()
+
diff --git a/w2/w2lect.md b/w2/w2lect.md
new file mode 100644
index 0000000..3fee16a
--- /dev/null
+++ b/w2/w2lect.md
@@ -0,0 +1,30 @@
+# Boyer-Moore
+
+## Bad character rule:
+
+Shift pattern along to the left to the right-most version of the bad character
+$O(m+n)$ *mostly*
+
+## Extended bad character rule
+- 2D array for each char and each position in the pattern
+- **Reduces naive shifts (i.e. by 1 to the right) but takes more space**
+	- Could use linked lists or something but that takes more time
+
+## Good suffix rule
+**Makes Boyer-Moore worst case *almost* linear time rather than squared time**
+- A suffix before the bad character that you know matches the text
+- character to the left of the next instance of the good suffix must be different to the one to the left of the original suffix
+- Move pattern along to the right to the point where the next suffix in the pattern matches
+
+## Galil's optimization
+Improves on good suffix rules
+**Actually linear time**
+extended just makes it slightly faster and doesn't change the complexity
+
+# KNP
+- $O(m+n)$
+- Easier to write, simpler
+- Slower in practise than Boyer-Moore
+- If mismatched first charatcer, move left by 1 (not covered by slides)
+- Use Galil's on this too
+- Proof is examinable (BM isn't)
author	akiyamn	2021-03-18 15:06:37 +1100
committer	akiyamn	2021-03-18 15:06:37 +1100
commit	0dfa5353c274deb9aa455475965cf060e8ce4bd9 (patch)
tree	d8885240bafcde4e5c3767273df98836e21d83a9 /w2
parent	527ce5065ae429643433fac3435aeaa8f535e7c1 (diff)
download	fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.tar.gz fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.zip