-
-
Notifications
You must be signed in to change notification settings - Fork 7.3k
/
rabin_karp.cpp
111 lines (102 loc) · 3.35 KB
/
rabin_karp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/**
* \file
* \brief The [Rabin-Karp
* Algorithm](https://en.wikipedia.org/wiki/Rabin–Karp_algorithm) for finding a
* pattern within a piece of text with complexity O(n + m)
*/
#include <cassert>
#include <cmath>
#include <iostream>
#ifdef _MSC_VER
#include <string> // use this for MS Visucal C++
#else
#include <cstring>
#endif
#define PRIME 5 ///< Prime modulus for hash functions
namespace string_search {
/**
* convert a string to an intger - called as hashing function
* \param[in] s source of string to hash
* \param[in] n length of substring to hash
* \returns hash integer
*/
int64_t create_hash(const std::string& s, int n) {
int64_t result = 0;
for (int i = 0; i < n; ++i) {
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
}
return result;
}
/**
* re-hash a string using known existing hash
* \param[in] s source of string to hash
* \param[in] old_index previous index of string
* \param[in] new_index new index of string
* \param[in] old_hash previous hash of substring
* \param[in] patLength length of substring to hash
* \returns new hash integer
*/
int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
int64_t old_hash, int patLength) {
int64_t new_hash = old_hash - s[old_index];
new_hash /= PRIME;
new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
return new_hash;
}
/**
* compare if two sub-strings are equal
* \param[in] str1 string pattern to search
* \param[in] str2 text in which to search
* \param[in] start1,end1 start and end indices for substring in str1
* \param[in] start2,end2 start and end indices for substring in str2
* \returns `true` if pattern was found
* \returns `false` if pattern was not found
* @note can this be replaced by std::string::compare?
*/
bool check_if_equal(const std::string& str1, const std::string& str2,
int start1, int end1, int start2, int end2) {
if (end1 - start1 != end2 - start2) {
return false;
}
while (start1 <= end1 && start2 <= end2) {
if (str1[start1] != str2[start2]) {
return false;
}
start1++;
start2++;
}
return true;
}
/**
* Perform string pattern search using Rabin-Karp algorithm
* @param[in] str string to search in
* @param[in] pat pattern to search for
* @return index of first occurrence of pattern
* @return -1 if pattern not found
*/
int rabin_karp(const std::string& str, const std::string& pat) {
int64_t pat_hash = create_hash(pat, pat.size());
int64_t str_hash = create_hash(str, pat.size());
for (int i = 0; i <= str.size() - pat.size(); ++i) {
if (pat_hash == str_hash &&
check_if_equal(str, pat, i, i + pat.size() - 1, 0,
pat.size() - 1)) {
return i;
}
if (i < str.size() - pat.size()) {
str_hash =
recalculate_hash(str, i, i + pat.size(), str_hash, pat.size());
}
}
return -1; // return -1 if given pattern not found
}
} // namespace string_search
using string_search::rabin_karp;
/** Main function */
int main(void) {
assert(rabin_karp("helloWorld", "world") == -1);
assert(rabin_karp("helloWorld", "World") == 5);
assert(rabin_karp("this_is_c++", "c++") == 8);
assert(rabin_karp("happy_coding", "happy") == 0);
return 0;
}