字符串哈希

字符串哈希

参考:https://oi-wiki.org/string/hash

1. 哈希函数定义

\[ \begin{align} \text{hash}(n) & = \sum_{i=1}^{n} s_i \times B^{N-i} \mod M \\ & = \text{hash}(n-1) \times B + s_n \mod M \\ & = (s_1 \times B^{N-1} + s_2 \times B^{N-2} + \cdots + s_N) \mod M \end{align} \]

\[ \text{hash}(l, r) = \text{hash}(r) - \text{hash}(l-1) \times B^{r-l+1} \mod M \]

\[ B = 131, M \in \{998244353, 1000000007\} \]

2. 例题

1. leetcode28 Find the Index of the First Occurrence in a String

python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class StrHash:
MODS = [998244353, 1000000007]

class Hasher:
BASE = 131

def __init__(self, s: str, m: int) -> None:
self.m = m
N = len(s)
self.p = [1] * (N + 1)
for i in range(1, N + 1):
self.p[i] = self.p[i - 1] * self.BASE % m
self.a = [0] * (N + 1)
for i, c in enumerate(s, 1):
self.a[i] = (self.a[i - 1] * self.BASE + ord(c)) % m

def query(self, l: int, r: int) -> int:
return (self.a[r] - self.a[l - 1] * self.p[r - l + 1] % self.m + self.m) % self.m

def __init__(self, s: str) -> None:
self.hashers = tuple(self.Hasher(s, m) for m in self.MODS)

def query(self, l: int, r: int) -> tuple[int, ...]:
"[l, r)"
return tuple(h.query(l + 1, r) for h in self.hashers)


class Solution:
def strStr(self, haystack: str, needle: str) -> int:
N, M = len(haystack), len(needle)
target = StrHash(needle).query(0, M)
h = StrHash(haystack)
for i in range(N - M + 1):
if h.query(i, i + M) == target:
return i
return -1

2. CF1200E Compress Words

  • I want to order pizza \(\rightarrow\) Iwantorderpizza
  • sample please ease in out \(\rightarrow\) sampleaseinout
cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <bits/stdc++.h>
using namespace std;
using ull = unsigned long long;
constexpr ull B = 131;
constexpr ull MODS[2]{998244353, 1000000007};

void solve() {
int N;
cin >> N;
vector<string> a(N);
for (auto& s : a) cin >> s;

string cur;
for (const auto& s : a) {
ull hashes_cur[2]{};
ull hashes_s[2]{};
ull powers[2]{1, 1};
int overlap = 0;

for (int i = 0; i < min(s.size(), cur.size()); ++i) {
ull cur_c = cur[cur.size() - 1 - i];
ull s_c = s[i];

for (int j = 0; j < 2; j++) {
hashes_cur[j] = (hashes_cur[j] + powers[j] * cur_c % MODS[j]) % MODS[j];
powers[j] = (powers[j] * B) % MODS[j];
hashes_s[j] = (hashes_s[j] * B % MODS[j] + s_c) % MODS[j];
}
if (hashes_cur[0] == hashes_s[0] && hashes_cur[1] == hashes_s[1]) overlap = i + 1;
}
cur.append(s.begin() + overlap, s.end());
}

cout << cur;
}

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
solve();
}

字符串哈希
https://blog.fredbill.eu.org/2025/01/05/算法/字符串/字符串哈希/
作者
FredBill
发布于
2025年1月5日
许可协议