263 lines
5.6 KiB
Text
263 lines
5.6 KiB
Text
---
|
|
id: string-suffix
|
|
title: "String Suffix Structures"
|
|
author: Benjamin Qi, Siyong Huang
|
|
description: "Suffix Automata, Suffix Trees, and (TBD) Palindromic Trees"
|
|
prerequisites:
|
|
- Platinum - String Searching
|
|
frequency: 0
|
|
---
|
|
|
|
export const metadata = {
|
|
problems: {
|
|
auto: [
|
|
new Problem("Plat", "Standing Out from the Herd", "768", "Hard", false, [], ""),
|
|
],
|
|
tree: [
|
|
new Problem("CF", "Security", "contest/1037/problem/H", "Hard", false, ["Suffix Tree"], ""),
|
|
]
|
|
}
|
|
};
|
|
|
|
## Suffix Automaton
|
|
|
|
The **Suffix Automaton** is a directed acyclic word graph (DAWG), such that each path in the graph traces out a distinct substring of the original string.
|
|
|
|
### Resources
|
|
|
|
<Resources>
|
|
<Resource source="CF" title="A short guide to suffix automata" url="blog/entry/20861">Explanation of Suffix Automata</Resource>
|
|
<Resource source="cp-algo" title="Suffix Automaton" url="string/suffix-automaton.html" starred>Excellent Suffix Automaton tutorial</Resource>
|
|
</Resources>
|
|
|
|
<Info>
|
|
|
|
Most problems can be solved with Suffix Arrays, Suffix Automata, or Suffix Trees. The solution may just be slightly easier/harder with the various data structures.
|
|
|
|
</Info>
|
|
|
|
### Problems
|
|
|
|
<Problems problems={metadata.problems.auto} />
|
|
|
|
<Spoiler title="USACO Standing Out using Suffix Automata">
|
|
|
|
<!-- Checked via USACO Practice -->
|
|
|
|
```cpp
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <vector>
|
|
|
|
FILE * IN, * OUT;
|
|
typedef long long ll;
|
|
const int MN = 1e5+10, MM = MN*2;
|
|
char s[MN];
|
|
std::vector<int> down[MM];
|
|
int N, v[MM], c[MM][26], l[MM], d[MM], topo[MM], T, X;
|
|
ll f[MN], cnt[MM];
|
|
bool u[MM];
|
|
|
|
/*
|
|
Key Variables:
|
|
|
|
s: input strings
|
|
down: link tree of automaton
|
|
v: information regarding which cow each node belongs to
|
|
c: child array of automaton
|
|
l: link (of automaton)
|
|
d: depth (of automaton)
|
|
topo: toposort (of automaton)
|
|
T, X: counters for toposort and automaton
|
|
f: answer
|
|
cnt: number of ways to reach a node from the root
|
|
u: visited array for toposort
|
|
*/
|
|
|
|
//add cow b to value a
|
|
//value = -1: no cow assigned
|
|
//value = -2: multiple cows assigned
|
|
//value = 0..N: cow id
|
|
void merge(int& a, int b)
|
|
{
|
|
if(!~a) a=b;
|
|
else if(~b&&a!=b) a=-2;
|
|
}
|
|
|
|
//template automaton code
|
|
int append(int p, char x)
|
|
{
|
|
if(~c[p][x])
|
|
{
|
|
int q=c[p][x];
|
|
if(d[q]==d[p]+1)
|
|
return q;
|
|
else
|
|
{
|
|
++X;
|
|
for(int i=0;i<26;++i) c[X][i]=c[q][i];
|
|
l[X]=l[q], d[X]=d[p]+1;
|
|
l[q]=X;
|
|
for(;~p&&c[p][x]==q;p=l[p])
|
|
c[p][x]=l[q];
|
|
return l[q];
|
|
}
|
|
}
|
|
int n = ++X;
|
|
d[n]=d[p]+1;
|
|
for(;~p&&!~c[p][x];p=l[p])
|
|
c[p][x]=n;
|
|
if(!~p)
|
|
l[n]=0;
|
|
else
|
|
{
|
|
int q=c[p][x];
|
|
if(d[q]==d[p]+1)
|
|
l[n]=q;
|
|
else
|
|
{
|
|
++X;
|
|
for(int i=0;i<26;++i) c[X][i]=c[q][i];
|
|
l[X]=l[q], d[X]=d[p]+1;
|
|
l[n]=l[q]=X;
|
|
for(;~p&&c[p][x]==q;p=l[p])
|
|
c[p][x]=l[q];
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
//DFS along links
|
|
void dfs2(int n=0)
|
|
{
|
|
for(int x:down[n])
|
|
{
|
|
dfs2(x);
|
|
merge(v[n], v[x]);
|
|
}
|
|
}
|
|
//DFS along suffix automaton. This builds the toposort
|
|
void dfs(int n=0)
|
|
{
|
|
u[n]=1;
|
|
for(int i=0;i<26;++i)
|
|
{
|
|
int y=c[n][i];
|
|
if(~y && !u[y]) dfs(y);
|
|
}
|
|
topo[T++] = n;
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
IN = fopen("standingout.in", "r"), OUT = fopen("standingout.out", "w");
|
|
memset(v, -1, sizeof v);
|
|
memset(c, -1, sizeof c);
|
|
fscanf(IN, "%d", &N);
|
|
d[0]=0, l[0]=-1;
|
|
for(int i=0;i<N;++i)
|
|
{
|
|
fscanf(IN, " %s", s);
|
|
int n=0;
|
|
for(int j=0;s[j];++j)
|
|
{
|
|
n = append(n, s[j]-'a'); //build automaton
|
|
merge(v[n], i);
|
|
}
|
|
}
|
|
//build link tree
|
|
for(int i=1;i<=X;++i)
|
|
down[l[i]].push_back(i);
|
|
dfs();//dfs link tree
|
|
dfs2();//dfs automaton
|
|
cnt[0]=1;
|
|
for(int i=T-1, x;i>=0;--i)
|
|
{
|
|
x=topo[i];
|
|
for(int j=0;j<26;++j)
|
|
if(~c[x][j])
|
|
cnt[c[x][j]]+=cnt[x];//count number of paths from root to a node
|
|
if(v[x]>=0)
|
|
f[v[x]]+=cnt[x];//if this node is associated with a unique cow, add to answer
|
|
}
|
|
for(int i=0;i<N;++i)
|
|
fprintf(OUT, "%lld\n", f[i]);
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
</Spoiler>
|
|
|
|
## Suffix Tree
|
|
|
|
The **Suffix Tree** is a trie that contains all suffixes of a string.
|
|
Naively, this would take up $O(N^2)$ memory, but *path compression* enables it to be represented and computed in linear memory.
|
|
|
|
### Resources
|
|
|
|
<Resources>
|
|
<Resource source="CF" title="Suffix Tree. Ukkonen's algorithm" url="blog/entry/16780">Explanation of Ukkonen's Suffix Tree Algorithm</Resource>
|
|
<Resource source="cp-algo" title="Suffix Tree. Ukkonen's Algorithm" url="string/suffix-tree-ukkonen.html">Implementation of Ukkonen's Algorithm</Resource>
|
|
</Resources>
|
|
|
|
### Problems
|
|
|
|
<Problems problems={metadata.problems.tree} />
|
|
|
|
### Generate Suffix Array
|
|
|
|
A suffix array can be generated by the suffix tree by taking the dfs traversal of the suffix tree.
|
|
|
|
<LanguageSection>
|
|
|
|
<CPPSection>
|
|
|
|
<!-- https://codeforces.com/edu/course/2/lesson/2/2/practice/contest/269103/submission/85759835 -->
|
|
|
|
```cpp
|
|
int N, sa[MN];//length of string, suffix array
|
|
|
|
struct edge
|
|
{
|
|
public:
|
|
int n, l, r;//node, edge covers s[l..r]
|
|
explicit operator bool() const {return n!=-1;}
|
|
} c[MN*2][26];
|
|
|
|
void dfs(int n=0, int d=0)
|
|
{
|
|
bool c=0;// Has child. If false, then this node is a leaf
|
|
for(int i=0;i<26;++i)
|
|
if(c[n][i])
|
|
{
|
|
c=1;
|
|
dfs(c[n][i].n, d+c[n][i].r-c[n][i].l);
|
|
}
|
|
if(!c)
|
|
sa[ctr++]=N-d;
|
|
}
|
|
```
|
|
</CPPSection>
|
|
|
|
<JavaSection />
|
|
|
|
<PySection />
|
|
|
|
</LanguageSection>
|
|
|
|
## Palindromic Tree
|
|
|
|
(Still don't know what these are!! Benq help!)
|
|
|
|
|
|
* String Suffix Structures
|
|
* Suffix Tree
|
|
* [CF](https://codeforces.com/blog/entry/16780)
|
|
* [CP-Algo](https://cp-algorithms.com/string/suffix-tree-ukkonen.html)
|
|
* O(nlogn) suffix array usually suffices
|
|
* More on Palindromic Tree
|
|
* [Palindrome Partition](https://codeforces.com/contest/932/problem/G)
|
|
* [Partial Solution](https://codeforces.com/blog/entry/19193)
|
|
* [Palindromic Magic (HARD)](https://codeforces.com/contest/1081/problem/H)
|
|
|