diff options
| author | akiyamn | 2021-03-18 15:06:37 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-18 15:06:37 +1100 |
| commit | 0dfa5353c274deb9aa455475965cf060e8ce4bd9 (patch) | |
| tree | d8885240bafcde4e5c3767273df98836e21d83a9 /w2 | |
| parent | 527ce5065ae429643433fac3435aeaa8f535e7c1 (diff) | |
| download | fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.tar.gz fit3155-0dfa5353c274deb9aa455475965cf060e8ce4bd9.zip | |
Start of ass1
Diffstat (limited to 'w2')
| -rw-r--r-- | w2/w2lab/main.py | 66 | ||||
| -rw-r--r-- | w2/w2lect.md | 30 |
2 files changed, 96 insertions, 0 deletions
diff --git a/w2/w2lab/main.py b/w2/w2lab/main.py new file mode 100644 index 0000000..2914da3 --- /dev/null +++ b/w2/w2lab/main.py @@ -0,0 +1,66 @@ +def naive(string): + z = [0 for _ in string] + z[0] = len(string) + for i in range(1, len(string)): + for j in range(0, len(string)): + if i+j == len(string) or string[i+j] != string[j]: + z[i] = j + break + return z + + +def naive2(string): + z = [0 for _ in string] + z[0] = len(string) + for i in range(1, len(string)): + z[i] = compare(string, i, len(string)) + return z + + +def compare(string, i, end): + for j in range(end): + if i+j == end or string[i+j] != string[j]: + return j + + +def gusfield(string): + z = [0 for _ in string] + z[0] = len(string) + r = 0 + l = 0 + for i in range(1, len(string)): + if i == 1: # base case + z[1] = compare(string, i, len(string)) + if z[1] > 0: + r = z[1] + 1 + l = 1 + elif i > r: # Case 1 + z[i] = compare(string, i, len(string)) + if z[i] > 0: + q = i + z[i] + r = q - 1 + l = i + elif i <= r: # Case 2 + if z[i-l] < r-i: # 2a + print(f"{i} is a case 2a") + z[i] = z[i-l] + else: # 2b + print(f"{i} is a case 2b") + q = compare(string, i, len(string)) + z[i] = q + r = q + l = i + + print(f"{l=}, {r=}") + return z + + +def main(): + string = "ababacababa" + print(naive2(string)) + print("="*15) + print(gusfield(string)) + +if __name__ == '__main__': + main() + diff --git a/w2/w2lect.md b/w2/w2lect.md new file mode 100644 index 0000000..3fee16a --- /dev/null +++ b/w2/w2lect.md @@ -0,0 +1,30 @@ +# Boyer-Moore + +## Bad character rule: + +Shift pattern along to the left to the right-most version of the bad character +$O(m+n)$ *mostly* + +## Extended bad character rule +- 2D array for each char and each position in the pattern +- **Reduces naive shifts (i.e. by 1 to the right) but takes more space** + - Could use linked lists or something but that takes more time + +## Good suffix rule +**Makes Boyer-Moore worst case *almost* linear time rather than squared time** +- A suffix before the bad character that you know matches the text +- character to the left of the next instance of the good suffix must be different to the one to the left of the original suffix +- Move pattern along to the right to the point where the next suffix in the pattern matches + +## Galil's optimization +Improves on good suffix rules +**Actually linear time** +extended just makes it slightly faster and doesn't change the complexity + +# KNP +- $O(m+n)$ +- Easier to write, simpler +- Slower in practise than Boyer-Moore +- If mismatched first charatcer, move left by 1 (not covered by slides) +- Use Galil's on this too +- Proof is examinable (BM isn't) |
