diff options
Diffstat (limited to 'ass1')
| -rw-r--r-- | ass1/binary_boyermoore.py | 79 | ||||
| -rw-r--r-- | ass1/output_binary_boyermoore.txt | 942 |
2 files changed, 967 insertions, 54 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 380eacf..4c41c77 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -11,7 +11,6 @@ def alpha_number(char): if char == "0" or char == "1": return int(char) return ord(char) - 97 - # return int(char) def reverse(string): @@ -106,96 +105,72 @@ def boyer_moore(pat, txt): R, good_suffix, matched_prefix = preprocess(pat) m = len(pat) n = len(txt) + i = m-1 j = 0 - occurrence = 0 + occurrences = [] galils = 0 comps = 0 - - # print("="*15) - # print(6*" " + txt) - i = m-1 galil = False - start = None - stop = None + start = 0 + stop = 0 while j <= n-m: - # print(f"{j=:02} {' ' * j}", end="") - # for x in range(len(pat)): - # if x == i: - # print(pat[x].upper(), end="") - # else: - # print(pat[x], end="") - # print() match = pat[i] == txt[j+i] comps += 1 if match: if galil and stop >= i > start: galils += 1 - assert start >= 0 i = max(start-1, 0) galil = False if i == 0: good_suffix_shift = m - matched_prefix[1] j += good_suffix_shift - occurrence += 1 + occurrences.append(j) i = m-1 else: i -= 1 else: - galil = False - mp = False - gs = False mismatched = txt[j + i] bad_char_shift = i - R[alpha_number(mismatched)][i] good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] - gs = True start = good_suffix[i+1] - m + i + 1 stop = good_suffix[i+1] elif good_suffix[i+1] == 0: good_suffix_shift = m - matched_prefix[i+1] - mp = True start = 0 stop = matched_prefix[i + 1] best_shift = max(good_suffix_shift, bad_char_shift) j += best_shift - galil = (mp or gs) and best_shift == good_suffix_shift + galil = best_shift == good_suffix_shift i = m-1 - - print(f"It found {occurrence} occurences.") - print(f"Galil'd {galils} times.") - # print(f"\n {list(range(m))}") - # print("" + str(list(map(int, pat)))) - # for i, a in enumerate(R): - # print(chr(i+97), a) - # print(good_suffix) - # print(matched_prefix) - print(f"{comps} comparisons.") - return comps, occurrence + print(comps) + return comps, occurrences def two_to_the(n): return 1 << n def chunky_search(pat, txt, factor=2): - occurrence = 0 + occurrences = [] comps = 0 for offset in range(two_to_the(factor-1)): padding = format(offset, f"0{factor-1}b") if len(pat) % factor else "" augmented_pat = f"{pat}{padding}" - # print(padding) - print(condense(format(two_to_the(factor)-1, f"0b"), 0, factor)) c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor)) comps += c - occurrence += o + print(offset, o) + occurrences += o base_comps, base_occur = boyer_moore(pat, txt) + print(base_occur[:20]) + print(occurrences[:10]) print("*"*20) - print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.") - print(f"Normal: {base_occur} occurences in {base_comps} comparisons.") - # assert base_occur == occurrence + print(f"Chunky Optimisation: {len(occurrences)} occurences in {comps} comparisons.") + print(f"Normal: {len(base_occur)} occurences in {base_comps} comparisons.") if base_comps > 0: print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore") print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore") + return comps, occurrences def read_args(): with open(sys.argv[1], "r") as txt_file: @@ -204,23 +179,19 @@ def read_args(): pat = pat_file.read() return txt, pat -# boyer_moore(condense("01100010"), condense(a)) -# boyer_moore(condense("01100011"), condense(a, offset=1)) +def output_matches(occurrences): + with open("output_binary_boyermoore.txt", "w") as file: + for o in occurrences: + file.write(f"{o}\n") def main(): - F = 2 + factor = 2 if len(sys.argv) < 3: print("Not enough arguments!") else: txt, pat = read_args() - chunky_search(pat, txt, factor=F) - -main() - - - + comps, occurrences = chunky_search(pat, txt, factor) + print(comps) + output_matches(occurrences) -# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011") -# print(condense("1110000110")) -# print(condense("1110000110", offset=1)) -# boyer_moore("111011011001110", "101010111010010101110111010101110100101011101101111011111111111101110111010101110100101011101101101110111010101110100101011101101101110111110110110011100000110101011101001010111011011011101100001010101110100101011101110101011101001010111011011011101110101011101001010111011011011101110101011101001010111011011011101100001101010111010010101110110110111011000010111011111011101110110101110110101101100000001011001010101010101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010100001011101111101110111011010111011010110110000000101100101010101010101011101001010111011011011101100001011101111101110111011010101110100101011101101101110110000101110111110101010111010010101110110110111011000010111011101010111010010101110110110111011000010111010101110100101011101101101010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101111011000010111011111011101110110101110110101101100000001011001010101010110111110111011101101011101101011011000000010110010101010101111011101110110101110110101101100000001011001010101010111101110110101110110101101100000001011001010101010110101110110101101100000001011001010101010110110111011000010101011101001010111011101010111010010101110110110111011101010111010010101110110110111011101010111010010101110110110111011000011010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111101111111111111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101")
\ No newline at end of file +main()
\ No newline at end of file diff --git a/ass1/output_binary_boyermoore.txt b/ass1/output_binary_boyermoore.txt new file mode 100644 index 0000000..92be1cb --- /dev/null +++ b/ass1/output_binary_boyermoore.txt @@ -0,0 +1,942 @@ +464 +1461 +4048 +5775 +6661 +6819 +8353 +8391 +8800 +10744 +12431 +14295 +14313 +15495 +16320 +18062 +18837 +19870 +21321 +21545 +22689 +23072 +24058 +24511 +26796 +27628 +29179 +30672 +31637 +32922 +37188 +37553 +38562 +40261 +44478 +45943 +46537 +46599 +46732 +47037 +48324 +50004 +50737 +51431 +51541 +51619 +51816 +53408 +53745 +54342 +55701 +57662 +59420 +60430 +61220 +62869 +63052 +63440 +63680 +64410 +64584 +64895 +66451 +67633 +68932 +69006 +69430 +70768 +71354 +71668 +73099 +73141 +73445 +73532 +73717 +74341 +74484 +74697 +74944 +75882 +76226 +76229 +76644 +76967 +77383 +77958 +79124 +79592 +79601 +81792 +83645 +83985 +84428 +84891 +85579 +87019 +87855 +88385 +89718 +89817 +90909 +92056 +94324 +97761 +99091 +99274 +99507 +102387 +102920 +103093 +103220 +103433 +103607 +103942 +104451 +104586 +105690 +108171 +109210 +109308 +109573 +110114 +111484 +112487 +113353 +113705 +116426 +117642 +118401 +118607 +118613 +118709 +121242 +127074 +128971 +129068 +130717 +130832 +131375 +132275 +134020 +134185 +136858 +138541 +138580 +144970 +146265 +147890 +148974 +149106 +149612 +150069 +154000 +155528 +155843 +156723 +159185 +160711 +161060 +162522 +162775 +162944 +163711 +163812 +164775 +165854 +165867 +165991 +166150 +170165 +170651 +170963 +171778 +172703 +173047 +179304 +180494 +180961 +181396 +182310 +182313 +182650 +183955 +184019 +185576 +185851 +185909 +188230 +188342 +188510 +188972 +190307 +191123 +191802 +193285 +193341 +194573 +196399 +196523 +196555 +197858 +200973 +201042 +201528 +204030 +204582 +204786 +205381 +206801 +208914 +209829 +210013 +210132 +212030 +212290 +212756 +213964 +214285 +215178 +215549 +218844 +220018 +220497 +220728 +220819 +223943 +224918 +226885 +227893 +227896 +228421 +228733 +230760 +232688 +232746 +234461 +234757 +234963 +236477 +239505 +239806 +240103 +240891 +242853 +244107 +245007 +245202 +245757 +247858 +250154 +250633 +250825 +251687 +251923 +253977 +257404 +257643 +257865 +260857 +262853 +263053 +263992 +264312 +264829 +266105 +267867 +268124 +268127 +268460 +270263 +271536 +271629 +272157 +272548 +273051 +273187 +273522 +274815 +275320 +277573 +277845 +278554 +279853 +281974 +282164 +282804 +285832 +287094 +287226 +288105 +289820 +290381 +291828 +292572 +293132 +294405 +298406 +298409 +300370 +301186 +302277 +302615 +302688 +303139 +303458 +303476 +303653 +303670 +303983 +304212 +305278 +306305 +307044 +307061 +308654 +309314 +310783 +311126 +311562 +313104 +313935 +314008 +314932 +318305 +318488 +318806 +319488 +319601 +319890 +321425 +322729 +323700 +324226 +325292 +325502 +326081 +328472 +328543 +328650 +328874 +328981 +330329 +331105 +338768 +340487 +340660 +341268 +342153 +344842 +348092 +348691 +350424 +350608 +350860 +351729 +352937 +353153 +353169 +353588 +354125 +355247 +355843 +356889 +357821 +358065 +358622 +359508 +359523 +359869 +360710 +362821 +364372 +367952 +371670 +371839 +372276 +373592 +374138 +374445 +375843 +377427 +378257 +380666 +381365 +383748 +385566 +387357 +387484 +387487 +388776 +390280 +392833 +392861 +393182 +393253 +393415 +393908 +394255 +395919 +396419 +396556 +396643 +396915 +397201 +398935 +399082 +400031 +400423 +401283 +401975 +403063 +403348 +406828 +409492 +410896 +411139 +413040 +413045 +414448 +414743 +415439 +416292 +418648 +419237 +421069 +421227 +423158 +424213 +425534 +428816 +429366 +430474 +430745 +431317 +431615 +432919 +433297 +433960 +434390 +434490 +435357 +436736 +436771 +437284 +440334 +440739 +443027 +443642 +443942 +444847 +446069 +447584 +448983 +449892 +451653 +451857 +452515 +453090 +453729 +454721 +455522 +455701 +456498 +458215 +459335 +461790 +466643 +470243 +472263 +475232 +479635 +480151 +480303 +482847 +483094 +483400 +485401 +485667 +487886 +488163 +488215 +491796 +495958 +496184 +497624 +2359 +3217 +4843 +7294 +8518 +8670 +9478 +9765 +9875 +10841 +13537 +14748 +15620 +15623 +16324 +22451 +23706 +24400 +24444 +25093 +27404 +28370 +32603 +32641 +32822 +35400 +37634 +38255 +38968 +39179 +41318 +41855 +42134 +42151 +42293 +42353 +42360 +43499 +44099 +44747 +45238 +46385 +46830 +47613 +48008 +48451 +48707 +50870 +54510 +54654 +55800 +57679 +57961 +58508 +59523 +60454 +60947 +62475 +62527 +63176 +64433 +66467 +67914 +68081 +70000 +71066 +72626 +72897 +73862 +76385 +77200 +77709 +77722 +77807 +78452 +83238 +83811 +84207 +84621 +84899 +85616 +85621 +86170 +88051 +88108 +88634 +88776 +88922 +88978 +89524 +89786 +90633 +91238 +92661 +93093 +95566 +102928 +103019 +107432 +108357 +109322 +110032 +110626 +113623 +113929 +114184 +114279 +114434 +115012 +115771 +119641 +120246 +121618 +125572 +127566 +128979 +129141 +134179 +137231 +137360 +137802 +137868 +137871 +139351 +140226 +140603 +140987 +141461 +142365 +142418 +143772 +144712 +145824 +150373 +152408 +153657 +156690 +157001 +158111 +158193 +158267 +158296 +159569 +164517 +164520 +165099 +166258 +169130 +171036 +176307 +176854 +177148 +178581 +178726 +178965 +179825 +179872 +181261 +182691 +183226 +183473 +187045 +187361 +187973 +188721 +188841 +190498 +191376 +194731 +197627 +198698 +201550 +203585 +203668 +203926 +204649 +206898 +209400 +210065 +210232 +210604 +211409 +212942 +213244 +213750 +215576 +216909 +217269 +217375 +220707 +224454 +225877 +225902 +226167 +226791 +227635 +227937 +230783 +231996 +233517 +237629 +237979 +239031 +239049 +239219 +240896 +242162 +243149 +245113 +245254 +245528 +246545 +247169 +247553 +248932 +249203 +249788 +251404 +251541 +252266 +253723 +254446 +254991 +255376 +255609 +256091 +259463 +261288 +262235 +262864 +270983 +271414 +272403 +272509 +272828 +273489 +273976 +274032 +274721 +275058 +275367 +276219 +276992 +278448 +279219 +279929 +280113 +280490 +283328 +285863 +286528 +286534 +286947 +287759 +288806 +288843 +288911 +289479 +291432 +292360 +292648 +292707 +296090 +298451 +298630 +299816 +301218 +301484 +302457 +303241 +303648 +305813 +307809 +308262 +309118 +309420 +311143 +312921 +314304 +315955 +316844 +316847 +317509 +318434 +319178 +319255 +320141 +320633 +320656 +320779 +321005 +323265 +324316 +325662 +328316 +328960 +329440 +331389 +334246 +334975 +335031 +335672 +337369 +337397 +337478 +338142 +339312 +340106 +343232 +343235 +343299 +343910 +344967 +345175 +345329 +345790 +347511 +347994 +348415 +348880 +349163 +349185 +351119 +352260 +353413 +355098 +356599 +358233 +360033 +360036 +362141 +367612 +367862 +368768 +371462 +372167 +372497 +372664 +374788 +376922 +377086 +377145 +377749 +378128 +378913 +382473 +385185 +386361 +387981 +389800 +390639 +391624 +391879 +393878 +396167 +396872 +397244 +397801 +398552 +400255 +401703 +402890 +404461 +406297 +406583 +408130 +408246 +408278 +409641 +410605 +410804 +411225 +411262 +411521 +413329 +413754 +415508 +418754 +421379 +422308 +422775 +422880 +423210 +423442 +423789 +423792 +424072 +424134 +425285 +425288 +425755 +427064 +428490 +429542 +429545 +430139 +430537 +430680 +433466 +434862 +435787 +435790 +437340 +437467 +437951 +438061 +439292 +441342 +443782 +445097 +448606 +449774 +454558 +455967 +457524 +457595 +459511 +459565 +460425 +460826 +461089 +461837 +462185 +462868 +463736 +464870 +467091 +467164 +467766 +468613 +468992 +469306 +469856 +472282 +476579 +477121 +477547 +478201 +478887 +480888 +484035 +484204 +485190 +486400 +487853 +488007 +488253 +488546 +488549 +488552 +490084 +491317 +494446 +494985 +495868 +498466 +498520 |
