From 0314bccaddbca7ea9935f55461a2d7bbc8bca2d6 Mon Sep 17 00:00:00 2001
From: Bruno Freitas Tissei <bft15@inf.ufpr.br>
Date: Thu, 28 Nov 2019 13:37:24 -0300
Subject: [PATCH] Improve Aho Corasick

Signed-off-by: Bruno Freitas Tissei <bft15@inf.ufpr.br>
---
 algorithms/string/aho_corasick.cpp    | 39 ++++++++++-------
 contests/Cadernaveis/URI1141.cpp      | 62 ++++++++++++++++-----------
 contests/Cadernaveis/UVA10679_aho.cpp | 36 +++++++++-------
 3 files changed, 81 insertions(+), 56 deletions(-)

diff --git a/algorithms/string/aho_corasick.cpp b/algorithms/string/aho_corasick.cpp
index 23f0302..f5c29f2 100644
--- a/algorithms/string/aho_corasick.cpp
+++ b/algorithms/string/aho_corasick.cpp
@@ -13,11 +13,11 @@
 // *: Use only if "match_all" is necessary
 struct AhoCorasick {
   struct Node {
-    //*: vector<int> words;
+    vector<int> words;
     map<char,int> next;
-    int idx, fail, cnt, hei;
+    int fail, cnt, hei, occ;
 
-    Node() : idx(-1), fail(0), cnt(0), hei(0) {}
+    Node() : fail(0), cnt(0), hei(0), occ(-1) {}
     int has(char i) { return next.count(i); }
     int &operator[](char i) { return next[i]; }
   };
@@ -36,7 +36,7 @@ struct AhoCorasick {
     for (int i = 0; i < s.size(); n = trie[n][s[i]], ++i)
       if (!trie[n].has(s[i])) {
         trie[n][s[i]] = trie.size();
-        //*: trie[n].hei = i + 1;
+        trie[n].hei = i + 1;
         trie.pb(Node());
       }
     return n;
@@ -45,10 +45,8 @@ struct AhoCorasick {
   void build(const vector<string> &v) {
     for (int i = 0; i < v.size(); ++i) {
       int n = insert(v[i]);
-      trie[n].idx = i;
-      //*: trie[n].words.pb(i);
+      trie[n].words.pb(i);
     }
-    preprocess();
   }
 
   inline int suffix(int v, char c) {
@@ -62,10 +60,14 @@ struct AhoCorasick {
     for (int i = 0; i != Q.size(); ++i) {
       int u = Q[i];
       for (auto j : trie[u].next) {
-        trie[j.se].fail = u ? suffix(trie[u].fail, j.fi) : trie[u].fail;
-
-        //*: trie[j.se].words.insert(trie[j.se].words.end(), 
-        // all(trie[trie[j.se].fail].words));
+        int &v = trie[j.se].fail;
+        if (u) {
+          v = suffix(trie[u].fail, j.fi);
+          trie[j.se].occ = trie[v].words.size() ? v : trie[v].occ;
+        } else {
+          v = trie[u].fail;
+          trie[j.se].occ = -1;
+        }
         Q.pb(j.se);
       }
     }
@@ -80,14 +82,13 @@ struct AhoCorasick {
       u = suffix(u, i);
       trie[u].cnt++;
     }
-
     for (int i = top.size() - 1; i >= 0; --i)
       trie[trie[top[i]].fail].cnt += trie[top[i]].cnt;
 
     vector<int> ans;
-    for (auto i : trie)
-      if (i.idx != -1 && i.cnt)
-        ans.pb(i.idx);
+    for (auto &i : trie)
+      if (i.cnt && i.words.size())
+        for (auto j : i.words) ans.pb(j);
 
     sort(all(ans));
     return ans;
@@ -103,7 +104,15 @@ struct AhoCorasick {
       u = suffix(u, p[i]);
       for (auto j : trie[u].words)
         ans.pb({j, i - trie[u].hei + 1});
+
+      int x = u;
+      while (trie[x].occ != -1) {
+        x = trie[x].occ;
+        for (auto j : trie[x].words)
+          ans.pb({j, i - trie[x].hei + 1});
+      }
     }
+    sort(all(ans));
     return ans;
   }
 };
diff --git a/contests/Cadernaveis/URI1141.cpp b/contests/Cadernaveis/URI1141.cpp
index 36bf967..395917e 100644
--- a/contests/Cadernaveis/URI1141.cpp
+++ b/contests/Cadernaveis/URI1141.cpp
@@ -23,11 +23,11 @@ using ii = pair<int,int>;
 
 struct AhoCorasick {
   struct Node {
-    int fail;
+    int fail, occ;
     vector<int> words;
     map<char,int> next;
 
-    Node() : fail(0) {}
+    Node() : fail(0), occ(-1) {}
     int has(char i) { return next.count(i); }
     int &operator[](char i) { return next[i]; }
   };
@@ -68,8 +68,14 @@ struct AhoCorasick {
     for (int i = 0; i != Q.size(); ++i) {
       int u = Q[i];
       for (auto j : trie[u].next) {
-        trie[j.se].fail = u ? suffix(trie[u].fail, j.fi) : trie[u].fail;
-        trie[j.se].words.insert(trie[j.se].words.end(), all(trie[trie[j.se].fail].words));
+        int &v = trie[j.se].fail;
+        if (u) {
+          v = suffix(trie[u].fail, j.fi);
+          trie[j.se].occ = trie[v].words.size() ? v : trie[v].occ;
+        } else {
+          v = trie[u].fail;
+          trie[j.se].occ = -1;
+        }
         Q.pb(j.se);
       }
     }
@@ -80,22 +86,6 @@ int n;
 int dp[10101];
 vector<string> v;
 
-int solve(int i, AhoCorasick &aho) {
-  if (i == n) return 0;
-  if (dp[i] != -1) return dp[i];
-
-  int u = 0;
-  int grt = 0;
-  for (auto j : v[i]) {
-    u = aho.suffix(u, j);
-    for (auto k : aho.trie[u].words)
-      if (v[k].size() < v[i].size())
-        grt = max(grt, solve(k, aho) + 1);
-  }
-  
-  return dp[i] = grt;
-}
-
 int main() {
   ios::sync_with_stdio(0);
   cin.tie(0);
@@ -103,14 +93,34 @@ int main() {
   while (cin >> n && n) {
     v.clear();
     v.resize(n);
-    for (auto &i : v) cin >> i;
+    for (auto &i : v) 
+      cin >> i;
+
+    sort(all(v), [](const string &a, const string &b) { 
+      return a.size() > b.size(); 
+    });
+
     AhoCorasick aho(v);
+    for (int i = n - 1; i >= 0; --i) {
+      int grt = 0, u = 0;
+      for (auto j : v[i]) {
+        u = aho.suffix(u, j);
+        for (auto k : aho.trie[u].words)
+          if (k != i)
+            grt = max(grt, dp[k] + 1);
+
+        int x = u;
+        while (aho.trie[x].occ != -1) {
+          x = aho.trie[x].occ;
+          for (auto k : aho.trie[x].words)
+            if (k != i)
+              grt = max(grt, dp[k] + 1);
+        }
+      }
+      dp[i] = grt;
+    }
 
-    mset(dp, -1);
-    int ans = 0;
-    for (int i = 0; i < n; ++i)
-      ans = max(ans, solve(i, aho) + 1);
-    cout << ans << ende;
+    cout << (*max_element(dp, dp + n) + 1) << ende;
   }
 
   return 0;
diff --git a/contests/Cadernaveis/UVA10679_aho.cpp b/contests/Cadernaveis/UVA10679_aho.cpp
index e5e69c5..1576039 100644
--- a/contests/Cadernaveis/UVA10679_aho.cpp
+++ b/contests/Cadernaveis/UVA10679_aho.cpp
@@ -1,3 +1,5 @@
+/// I Love Strings! (Aho-Corasick)
+
 #include <bits/stdc++.h>
  
 #define MAX 1010101
@@ -22,10 +24,11 @@ using ii = pair<int,int>;
  
 struct AhoCorasick {
   struct Node {
+    int fail, occ, cnt;
+    vector<int> words;
     map<char,int> next;
-    int idx, fail, cnt, hei;
 
-    Node() : idx(-1), fail(0), cnt(0), hei(0) {}
+    Node() : fail(0), occ(-1), cnt(0) {}
     int has(char i) { return next.count(i); }
     int &operator[](char i) { return next[i]; }
   };
@@ -52,9 +55,8 @@ struct AhoCorasick {
   void build(const vector<string> &v) {
     for (int i = 0; i < v.size(); ++i) {
       int n = insert(v[i]);
-      trie[n].idx = i;
+      trie[n].words.pb(i);
     }
-    preprocess();
   }
 
   inline int suffix(int v, char c) {
@@ -68,7 +70,14 @@ struct AhoCorasick {
     for (int i = 0; i != Q.size(); ++i) {
       int u = Q[i];
       for (auto j : trie[u].next) {
-        trie[j.se].fail = u ? suffix(trie[u].fail, j.fi) : trie[u].fail;
+        int &v = trie[j.se].fail;
+        if (u) {
+          v = suffix(trie[u].fail, j.fi);
+          trie[j.se].occ = trie[v].words.size() ? v : trie[v].occ;
+        } else {
+          v = trie[u].fail;
+          trie[j.se].occ = -1;
+        }
         Q.pb(j.se);
       }
     }
@@ -77,7 +86,8 @@ struct AhoCorasick {
 
   vector<int> match(const string &p) {
     int u = 0;
-    for (auto i : p) {
+    vector<int> ans;
+    for (auto &i : p) {
       u = suffix(u, i);
       trie[u].cnt++;
     }
@@ -85,12 +95,12 @@ struct AhoCorasick {
     for (int i = top.size() - 1; i >= 0; --i)
       trie[trie[top[i]].fail].cnt += trie[top[i]].cnt;
 
-    vector<int> ans;
     for (auto i : trie)
-      if (i.idx != -1 && i.cnt)
-        ans.pb(i.idx);
+      if (i.cnt && i.words.size())
+        for (auto j : i.words) ans.pb(j);
 
     sort(all(ans));
+    ans.erase(unique(all(ans)), ans.end());
     return ans;
   }
 };
@@ -109,16 +119,12 @@ int main() {
     AhoCorasick aho(v);
     vector<int> ans = aho.match(s);
    
-    map<string,int> M;
     for (int i = 0, j = 0; i < n; ++i) {
       if (j < ans.size() && ans[j] == i) { 
-        M[v[j]] = 1; ++j;
+        cout << 'y' << ende; ++j;
       } else
-        if (!M[v[j]]) M[v[j]] = 0;
+        cout << 'n' << ende;
     }
-
-    for (int i = 0; i < n; ++i)
-      cout << (M[v[i]] ? 'y' : 'n') << ende;
   }
  
   return 0;
-- 
GitLab