aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ass1/binary_boyermoore.py79
-rw-r--r--ass1/output_binary_boyermoore.txt942
2 files changed, 967 insertions, 54 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 380eacf..4c41c77 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -11,7 +11,6 @@ def alpha_number(char):
if char == "0" or char == "1":
return int(char)
return ord(char) - 97
- # return int(char)
def reverse(string):
@@ -106,96 +105,72 @@ def boyer_moore(pat, txt):
R, good_suffix, matched_prefix = preprocess(pat)
m = len(pat)
n = len(txt)
+ i = m-1
j = 0
- occurrence = 0
+ occurrences = []
galils = 0
comps = 0
-
- # print("="*15)
- # print(6*" " + txt)
- i = m-1
galil = False
- start = None
- stop = None
+ start = 0
+ stop = 0
while j <= n-m:
- # print(f"{j=:02} {' ' * j}", end="")
- # for x in range(len(pat)):
- # if x == i:
- # print(pat[x].upper(), end="")
- # else:
- # print(pat[x], end="")
- # print()
match = pat[i] == txt[j+i]
comps += 1
if match:
if galil and stop >= i > start:
galils += 1
- assert start >= 0
i = max(start-1, 0)
galil = False
if i == 0:
good_suffix_shift = m - matched_prefix[1]
j += good_suffix_shift
- occurrence += 1
+ occurrences.append(j)
i = m-1
else:
i -= 1
else:
- galil = False
- mp = False
- gs = False
mismatched = txt[j + i]
bad_char_shift = i - R[alpha_number(mismatched)][i]
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
- gs = True
start = good_suffix[i+1] - m + i + 1
stop = good_suffix[i+1]
elif good_suffix[i+1] == 0:
good_suffix_shift = m - matched_prefix[i+1]
- mp = True
start = 0
stop = matched_prefix[i + 1]
best_shift = max(good_suffix_shift, bad_char_shift)
j += best_shift
- galil = (mp or gs) and best_shift == good_suffix_shift
+ galil = best_shift == good_suffix_shift
i = m-1
-
- print(f"It found {occurrence} occurences.")
- print(f"Galil'd {galils} times.")
- # print(f"\n {list(range(m))}")
- # print("" + str(list(map(int, pat))))
- # for i, a in enumerate(R):
- # print(chr(i+97), a)
- # print(good_suffix)
- # print(matched_prefix)
- print(f"{comps} comparisons.")
- return comps, occurrence
+ print(comps)
+ return comps, occurrences
def two_to_the(n):
return 1 << n
def chunky_search(pat, txt, factor=2):
- occurrence = 0
+ occurrences = []
comps = 0
for offset in range(two_to_the(factor-1)):
padding = format(offset, f"0{factor-1}b") if len(pat) % factor else ""
augmented_pat = f"{pat}{padding}"
- # print(padding)
- print(condense(format(two_to_the(factor)-1, f"0b"), 0, factor))
c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor))
comps += c
- occurrence += o
+ print(offset, o)
+ occurrences += o
base_comps, base_occur = boyer_moore(pat, txt)
+ print(base_occur[:20])
+ print(occurrences[:10])
print("*"*20)
- print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.")
- print(f"Normal: {base_occur} occurences in {base_comps} comparisons.")
- # assert base_occur == occurrence
+ print(f"Chunky Optimisation: {len(occurrences)} occurences in {comps} comparisons.")
+ print(f"Normal: {len(base_occur)} occurences in {base_comps} comparisons.")
if base_comps > 0:
print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore")
print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore")
+ return comps, occurrences
def read_args():
with open(sys.argv[1], "r") as txt_file:
@@ -204,23 +179,19 @@ def read_args():
pat = pat_file.read()
return txt, pat
-# boyer_moore(condense("01100010"), condense(a))
-# boyer_moore(condense("01100011"), condense(a, offset=1))
+def output_matches(occurrences):
+ with open("output_binary_boyermoore.txt", "w") as file:
+ for o in occurrences:
+ file.write(f"{o}\n")
def main():
- F = 2
+ factor = 2
if len(sys.argv) < 3:
print("Not enough arguments!")
else:
txt, pat = read_args()
- chunky_search(pat, txt, factor=F)
-
-main()
-
-
-
+ comps, occurrences = chunky_search(pat, txt, factor)
+ print(comps)
+ output_matches(occurrences)
-# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
-# print(condense("1110000110"))
-# print(condense("1110000110", offset=1))
-# boyer_moore("111011011001110", "101010111010010101110111010101110100101011101101111011111111111101110111010101110100101011101101101110111010101110100101011101101101110111110110110011100000110101011101001010111011011011101100001010101110100101011101110101011101001010111011011011101110101011101001010111011011011101110101011101001010111011011011101100001101010111010010101110110110111011000010111011111011101110110101110110101101100000001011001010101010101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010100001011101111101110111011010111011010110110000000101100101010101010101011101001010111011011011101100001011101111101110111011010101110100101011101101101110110000101110111110101010111010010101110110110111011000010111011101010111010010101110110110111011000010111010101110100101011101101101010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101111011000010111011111011101110110101110110101101100000001011001010101010110111110111011101101011101101011011000000010110010101010101111011101110110101110110101101100000001011001010101010111101110110101110110101101100000001011001010101010110101110110101101100000001011001010101010110110111011000010101011101001010111011101010111010010101110110110111011101010111010010101110110110111011101010111010010101110110110111011000011010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111101111111111111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101") \ No newline at end of file
+main() \ No newline at end of file
diff --git a/ass1/output_binary_boyermoore.txt b/ass1/output_binary_boyermoore.txt
new file mode 100644
index 0000000..92be1cb
--- /dev/null
+++ b/ass1/output_binary_boyermoore.txt
@@ -0,0 +1,942 @@
+464
+1461
+4048
+5775
+6661
+6819
+8353
+8391
+8800
+10744
+12431
+14295
+14313
+15495
+16320
+18062
+18837
+19870
+21321
+21545
+22689
+23072
+24058
+24511
+26796
+27628
+29179
+30672
+31637
+32922
+37188
+37553
+38562
+40261
+44478
+45943
+46537
+46599
+46732
+47037
+48324
+50004
+50737
+51431
+51541
+51619
+51816
+53408
+53745
+54342
+55701
+57662
+59420
+60430
+61220
+62869
+63052
+63440
+63680
+64410
+64584
+64895
+66451
+67633
+68932
+69006
+69430
+70768
+71354
+71668
+73099
+73141
+73445
+73532
+73717
+74341
+74484
+74697
+74944
+75882
+76226
+76229
+76644
+76967
+77383
+77958
+79124
+79592
+79601
+81792
+83645
+83985
+84428
+84891
+85579
+87019
+87855
+88385
+89718
+89817
+90909
+92056
+94324
+97761
+99091
+99274
+99507
+102387
+102920
+103093
+103220
+103433
+103607
+103942
+104451
+104586
+105690
+108171
+109210
+109308
+109573
+110114
+111484
+112487
+113353
+113705
+116426
+117642
+118401
+118607
+118613
+118709
+121242
+127074
+128971
+129068
+130717
+130832
+131375
+132275
+134020
+134185
+136858
+138541
+138580
+144970
+146265
+147890
+148974
+149106
+149612
+150069
+154000
+155528
+155843
+156723
+159185
+160711
+161060
+162522
+162775
+162944
+163711
+163812
+164775
+165854
+165867
+165991
+166150
+170165
+170651
+170963
+171778
+172703
+173047
+179304
+180494
+180961
+181396
+182310
+182313
+182650
+183955
+184019
+185576
+185851
+185909
+188230
+188342
+188510
+188972
+190307
+191123
+191802
+193285
+193341
+194573
+196399
+196523
+196555
+197858
+200973
+201042
+201528
+204030
+204582
+204786
+205381
+206801
+208914
+209829
+210013
+210132
+212030
+212290
+212756
+213964
+214285
+215178
+215549
+218844
+220018
+220497
+220728
+220819
+223943
+224918
+226885
+227893
+227896
+228421
+228733
+230760
+232688
+232746
+234461
+234757
+234963
+236477
+239505
+239806
+240103
+240891
+242853
+244107
+245007
+245202
+245757
+247858
+250154
+250633
+250825
+251687
+251923
+253977
+257404
+257643
+257865
+260857
+262853
+263053
+263992
+264312
+264829
+266105
+267867
+268124
+268127
+268460
+270263
+271536
+271629
+272157
+272548
+273051
+273187
+273522
+274815
+275320
+277573
+277845
+278554
+279853
+281974
+282164
+282804
+285832
+287094
+287226
+288105
+289820
+290381
+291828
+292572
+293132
+294405
+298406
+298409
+300370
+301186
+302277
+302615
+302688
+303139
+303458
+303476
+303653
+303670
+303983
+304212
+305278
+306305
+307044
+307061
+308654
+309314
+310783
+311126
+311562
+313104
+313935
+314008
+314932
+318305
+318488
+318806
+319488
+319601
+319890
+321425
+322729
+323700
+324226
+325292
+325502
+326081
+328472
+328543
+328650
+328874
+328981
+330329
+331105
+338768
+340487
+340660
+341268
+342153
+344842
+348092
+348691
+350424
+350608
+350860
+351729
+352937
+353153
+353169
+353588
+354125
+355247
+355843
+356889
+357821
+358065
+358622
+359508
+359523
+359869
+360710
+362821
+364372
+367952
+371670
+371839
+372276
+373592
+374138
+374445
+375843
+377427
+378257
+380666
+381365
+383748
+385566
+387357
+387484
+387487
+388776
+390280
+392833
+392861
+393182
+393253
+393415
+393908
+394255
+395919
+396419
+396556
+396643
+396915
+397201
+398935
+399082
+400031
+400423
+401283
+401975
+403063
+403348
+406828
+409492
+410896
+411139
+413040
+413045
+414448
+414743
+415439
+416292
+418648
+419237
+421069
+421227
+423158
+424213
+425534
+428816
+429366
+430474
+430745
+431317
+431615
+432919
+433297
+433960
+434390
+434490
+435357
+436736
+436771
+437284
+440334
+440739
+443027
+443642
+443942
+444847
+446069
+447584
+448983
+449892
+451653
+451857
+452515
+453090
+453729
+454721
+455522
+455701
+456498
+458215
+459335
+461790
+466643
+470243
+472263
+475232
+479635
+480151
+480303
+482847
+483094
+483400
+485401
+485667
+487886
+488163
+488215
+491796
+495958
+496184
+497624
+2359
+3217
+4843
+7294
+8518
+8670
+9478
+9765
+9875
+10841
+13537
+14748
+15620
+15623
+16324
+22451
+23706
+24400
+24444
+25093
+27404
+28370
+32603
+32641
+32822
+35400
+37634
+38255
+38968
+39179
+41318
+41855
+42134
+42151
+42293
+42353
+42360
+43499
+44099
+44747
+45238
+46385
+46830
+47613
+48008
+48451
+48707
+50870
+54510
+54654
+55800
+57679
+57961
+58508
+59523
+60454
+60947
+62475
+62527
+63176
+64433
+66467
+67914
+68081
+70000
+71066
+72626
+72897
+73862
+76385
+77200
+77709
+77722
+77807
+78452
+83238
+83811
+84207
+84621
+84899
+85616
+85621
+86170
+88051
+88108
+88634
+88776
+88922
+88978
+89524
+89786
+90633
+91238
+92661
+93093
+95566
+102928
+103019
+107432
+108357
+109322
+110032
+110626
+113623
+113929
+114184
+114279
+114434
+115012
+115771
+119641
+120246
+121618
+125572
+127566
+128979
+129141
+134179
+137231
+137360
+137802
+137868
+137871
+139351
+140226
+140603
+140987
+141461
+142365
+142418
+143772
+144712
+145824
+150373
+152408
+153657
+156690
+157001
+158111
+158193
+158267
+158296
+159569
+164517
+164520
+165099
+166258
+169130
+171036
+176307
+176854
+177148
+178581
+178726
+178965
+179825
+179872
+181261
+182691
+183226
+183473
+187045
+187361
+187973
+188721
+188841
+190498
+191376
+194731
+197627
+198698
+201550
+203585
+203668
+203926
+204649
+206898
+209400
+210065
+210232
+210604
+211409
+212942
+213244
+213750
+215576
+216909
+217269
+217375
+220707
+224454
+225877
+225902
+226167
+226791
+227635
+227937
+230783
+231996
+233517
+237629
+237979
+239031
+239049
+239219
+240896
+242162
+243149
+245113
+245254
+245528
+246545
+247169
+247553
+248932
+249203
+249788
+251404
+251541
+252266
+253723
+254446
+254991
+255376
+255609
+256091
+259463
+261288
+262235
+262864
+270983
+271414
+272403
+272509
+272828
+273489
+273976
+274032
+274721
+275058
+275367
+276219
+276992
+278448
+279219
+279929
+280113
+280490
+283328
+285863
+286528
+286534
+286947
+287759
+288806
+288843
+288911
+289479
+291432
+292360
+292648
+292707
+296090
+298451
+298630
+299816
+301218
+301484
+302457
+303241
+303648
+305813
+307809
+308262
+309118
+309420
+311143
+312921
+314304
+315955
+316844
+316847
+317509
+318434
+319178
+319255
+320141
+320633
+320656
+320779
+321005
+323265
+324316
+325662
+328316
+328960
+329440
+331389
+334246
+334975
+335031
+335672
+337369
+337397
+337478
+338142
+339312
+340106
+343232
+343235
+343299
+343910
+344967
+345175
+345329
+345790
+347511
+347994
+348415
+348880
+349163
+349185
+351119
+352260
+353413
+355098
+356599
+358233
+360033
+360036
+362141
+367612
+367862
+368768
+371462
+372167
+372497
+372664
+374788
+376922
+377086
+377145
+377749
+378128
+378913
+382473
+385185
+386361
+387981
+389800
+390639
+391624
+391879
+393878
+396167
+396872
+397244
+397801
+398552
+400255
+401703
+402890
+404461
+406297
+406583
+408130
+408246
+408278
+409641
+410605
+410804
+411225
+411262
+411521
+413329
+413754
+415508
+418754
+421379
+422308
+422775
+422880
+423210
+423442
+423789
+423792
+424072
+424134
+425285
+425288
+425755
+427064
+428490
+429542
+429545
+430139
+430537
+430680
+433466
+434862
+435787
+435790
+437340
+437467
+437951
+438061
+439292
+441342
+443782
+445097
+448606
+449774
+454558
+455967
+457524
+457595
+459511
+459565
+460425
+460826
+461089
+461837
+462185
+462868
+463736
+464870
+467091
+467164
+467766
+468613
+468992
+469306
+469856
+472282
+476579
+477121
+477547
+478201
+478887
+480888
+484035
+484204
+485190
+486400
+487853
+488007
+488253
+488546
+488549
+488552
+490084
+491317
+494446
+494985
+495868
+498466
+498520