Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions strings/knuth-morris-pratt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def kmp(pattern, text, len_p=None, len_t=None):
def kmp(pattern, text):
"""
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
with complexity O(n + m)
Expand All @@ -14,35 +14,46 @@ def kmp(pattern, text, len_p=None, len_t=None):
"""

# 1) Construct the failure array
failure = [0]
i = 0
for index, char in enumerate(pattern[1:]):
if pattern[i] == char:
i += 1
else:
i = 0
failure.append(i)
failure = get_failure_array(pattern)

# 2) Step through text searching for pattern
i, j = 0, 0 # index into text, pattern
while i < len(text):
if pattern[j] == text[i]:
if j == (len(pattern) - 1):
return True
i += 1
j += 1

# if this is a prefix in our pattern
# just go back far enough to continue
elif failure[j] > 0:
j = failure[j] - 1
else:
i += 1
elif j > 0:
j = failure[j - 1]
continue
i += 1
return False


if __name__ == '__main__':
def get_failure_array(pattern):
"""
Calculates the new index we should go to if we fail a comparison
:param pattern:
:return:
"""
failure = [0]
i = 0
j = 1
while j < len(pattern):
if pattern[i] == pattern[j]:
i += 1
elif i > 0:
i = failure[i-1]
continue
j += 1
failure.append(i)
return failure


if __name__ == '__main__':
# Test 1)
pattern = "abc1abc12"
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
Expand All @@ -54,4 +65,16 @@ def kmp(pattern, text, len_p=None, len_t=None):
text = "ABABZABABYABABX"
assert kmp(pattern, text)

# Test 3)
pattern = "AAAB"
text = "ABAAAAAB"
assert kmp(pattern, text)

# Test 4)
pattern = "abcdabcy"
text = "abcxabcdabxabcdabcdabcy"
assert kmp(pattern, text)

# Test 5)
pattern = "aabaabaaa"
assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2]
50 changes: 50 additions & 0 deletions strings/rabin-karp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
def rabin_karp(pattern, text):
"""

The Rabin-Karp Algorithm for finding a pattern within a piece of text
with complexity O(nm), most efficient when it is used with multiple patterns
as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.

This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify

1) Calculate pattern hash

2) Step through the text one character at a time passing a window with the same length as the pattern
calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
equality if the hashes match

"""
p_len = len(pattern)
p_hash = hash(pattern)

for i in range(0, len(text) - (p_len - 1)):

# written like this t
text_hash = hash(text[i:i + p_len])
if text_hash == p_hash and \
text[i:i + p_len] == pattern:
return True
return False


if __name__ == '__main__':
# Test 1)
pattern = "abc1abc12"
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
text2 = "alskfjaldsk23adsfabcabc"
assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)

# Test 2)
pattern = "ABABX"
text = "ABABZABABYABABX"
assert rabin_karp(pattern, text)

# Test 3)
pattern = "AAAB"
text = "ABAAAAAB"
assert rabin_karp(pattern, text)

# Test 4)
pattern = "abcdabcy"
text = "abcxabcdabxabcdabcdabcy"
assert rabin_karp(pattern, text)