// This file began as an import from LLVM, and so it has the same license as
// LLVM, copied below together with the code.

//===- llvm/Support/SuffixTree.cpp - Implement Suffix Tree ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the Suffix Tree class.
//
//===----------------------------------------------------------------------===//

#include "support/suffix_tree.h"
#include "support/suffix_tree_node.h"
#include "llvm/Support/Casting.h"

using namespace llvm;

namespace wasm {

/// \returns the number of elements in the substring associated with \p N.
static size_t numElementsInSubstring(const SuffixTreeNode* N) {
  assert(N && "Got a null node?");
  if (auto* Internal = dyn_cast<SuffixTreeInternalNode>(N)) {
    if (Internal->isRoot()) {
      return 0;
    }
  }
  return N->getEndIdx() - N->getStartIdx() + 1;
}

SuffixTree::SuffixTree(const std::vector<unsigned>& Str) : Str(Str) {
  Root = insertRoot();
  Active.Node = Root;

  // Keep track of the number of suffixes we have to add of the current
  // prefix.
  unsigned SuffixesToAdd = 0;

  // Construct the suffix tree iteratively on each prefix of the string.
  // PfxEndIdx is the end index of the current prefix.
  // End is one past the last element in the string.
  for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
    SuffixesToAdd++;
    LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
    SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
  }

  // Set the suffix indices of each leaf.
  assert(Root && "Root node can't be nullptr!");
  setSuffixIndices();
}

SuffixTreeNode* SuffixTree::insertLeaf(SuffixTreeInternalNode& Parent,
                                       unsigned StartIdx,
                                       unsigned Edge) {
  assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
  auto* N = new (LeafNodeAllocator.Allocate())
    SuffixTreeLeafNode(StartIdx, &LeafEndIdx);
  Parent.Children[Edge] = N;
  return N;
}

SuffixTreeInternalNode*
SuffixTree::insertInternalNode(SuffixTreeInternalNode* Parent,
                               unsigned StartIdx,
                               unsigned EndIdx,
                               unsigned Edge) {
  assert(StartIdx <= EndIdx && "String can't start after it ends!");
  assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) &&
         "Non-root internal nodes must have parents!");
  auto* N = new (InternalNodeAllocator.Allocate())
    SuffixTreeInternalNode(StartIdx, EndIdx, Root);
  if (Parent) {
    Parent->Children[Edge] = N;
  }
  return N;
}

SuffixTreeInternalNode* SuffixTree::insertRoot() {
  return insertInternalNode(/*Parent = */ nullptr,
                            SuffixTreeNode::EmptyIdx,
                            SuffixTreeNode::EmptyIdx,
                            /*Edge = */ 0);
}

void SuffixTree::setSuffixIndices() {
  // List of nodes we need to visit along with the current length of the
  // string.
  std::vector<std::pair<SuffixTreeNode*, unsigned>> ToVisit;

  // Current node being visited.
  SuffixTreeNode* CurrNode = Root;

  // Sum of the lengths of the nodes down the path to the current one.
  unsigned CurrNodeLen = 0;
  ToVisit.push_back({CurrNode, CurrNodeLen});
  while (!ToVisit.empty()) {
    std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
    ToVisit.pop_back();
    // Length of the current node from the root down to here.
    CurrNode->setConcatLen(CurrNodeLen);
    if (auto* InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
      for (auto& ChildPair : InternalNode->Children) {
        assert(ChildPair.second && "Node had a null child!");
        ToVisit.push_back(
          {ChildPair.second,
           CurrNodeLen + numElementsInSubstring(ChildPair.second)});
      }
    }
    // No children, so we are at the end of the string.
    if (auto* LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode)) {
      LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
    }
  }
}

unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
  SuffixTreeInternalNode* NeedsLink = nullptr;

  while (SuffixesToAdd > 0) {

    // Are we waiting to add anything other than just the last character?
    if (Active.Len == 0) {
      // If not, then say the active index is the end index.
      Active.Idx = EndIdx;
    }

    assert(Active.Idx <= EndIdx && "Start index can't be after end index!");

    // The first character in the current substring we're looking at.
    unsigned FirstChar = Str[Active.Idx];

    // Have we inserted anything starting with FirstChar at the current node?
    if (Active.Node->Children.count(FirstChar) == 0) {
      // If not, then we can just insert a leaf and move to the next step.
      insertLeaf(*Active.Node, EndIdx, FirstChar);

      // The active node is an internal node, and we visited it, so it must
      // need a link if it doesn't have one.
      if (NeedsLink) {
        NeedsLink->setLink(Active.Node);
        NeedsLink = nullptr;
      }
    } else {
      // There's a match with FirstChar, so look for the point in the tree to
      // insert a new node.
      SuffixTreeNode* NextNode = Active.Node->Children[FirstChar];

      unsigned SubstringLen = numElementsInSubstring(NextNode);

      // Is the current suffix we're trying to insert longer than the size of
      // the child we want to move to?
      if (Active.Len >= SubstringLen) {
        // If yes, then consume the characters we've seen and move to the next
        // node.
        // TODO: Enable the below assert
        // assert(isa<SuffixTreeInternalNode>(NextNode) &&
        //       "Expected an internal node?");
        Active.Idx += SubstringLen;
        Active.Len -= SubstringLen;
        Active.Node = cast<SuffixTreeInternalNode>(NextNode);
        continue;
      }

      // Otherwise, the suffix we're trying to insert must be contained in the
      // next node we want to move to.
      unsigned LastChar = Str[EndIdx];

      // Is the string we're trying to insert a substring of the next node?
      if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
        // If yes, then we're done for this step. Remember our insertion point
        // and move to the next end index. At this point, we have an implicit
        // suffix tree.
        if (NeedsLink && !Active.Node->isRoot()) {
          NeedsLink->setLink(Active.Node);
          NeedsLink = nullptr;
        }

        Active.Len++;
        break;
      }

      // The string we're trying to insert isn't a substring of the next node,
      // but matches up to a point. Split the node.
      //
      // For example, say we ended our search at a node n and we're trying to
      // insert ABD. Then we'll create a new node s for AB, reduce n to just
      // representing C, and insert a new leaf node l to represent d. This
      // allows us to ensure that if n was a leaf, it remains a leaf.
      //
      //   | ABC  ---split--->  | AB
      //   n                    s
      //                     C / \ D
      //                      n   l

      // The node s from the diagram
      SuffixTreeInternalNode* SplitNode =
        insertInternalNode(Active.Node,
                           NextNode->getStartIdx(),
                           NextNode->getStartIdx() + Active.Len - 1,
                           FirstChar);

      // Insert the new node representing the new substring into the tree as
      // a child of the split node. This is the node l from the diagram.
      insertLeaf(*SplitNode, EndIdx, LastChar);

      // Make the old node a child of the split node and update its start
      // index. This is the node n from the diagram.
      NextNode->incrementStartIdx(Active.Len);
      SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode;

      // SplitNode is an internal node, update the suffix link.
      if (NeedsLink) {
        NeedsLink->setLink(SplitNode);
      }

      NeedsLink = SplitNode;
    }

    // We've added something new to the tree, so there's one less suffix to
    // add.
    SuffixesToAdd--;

    if (Active.Node->isRoot()) {
      if (Active.Len > 0) {
        Active.Len--;
        Active.Idx = EndIdx - SuffixesToAdd + 1;
      }
    } else {
      // Start the next phase at the next smallest suffix.
      Active.Node = Active.Node->getLink();
    }
  }

  return SuffixesToAdd;
}

void SuffixTree::RepeatedSubstringIterator::advance() {
  // Clear the current state. If we're at the end of the range, then this
  // is the state we want to be in.
  RS = RepeatedSubstring();
  N = nullptr;

  // Each leaf node represents a repeat of a string.
  std::vector<unsigned> RepeatedSubstringStarts;

  // Continue visiting nodes until we find one which repeats more than once.
  while (!InternalNodesToVisit.empty()) {
    RepeatedSubstringStarts.clear();
    auto* Curr = InternalNodesToVisit.back();
    InternalNodesToVisit.pop_back();

    // Keep track of the length of the string associated with the node. If
    // it's too short, we'll quit.
    unsigned Length = Curr->getConcatLen();

    // Iterate over each child, saving internal nodes for visiting, and
    // leaf nodes in LeafChildren. Internal nodes represent individual
    // strings, which may repeat.
    for (auto& ChildPair : Curr->Children) {
      // Save all of this node's children for processing.
      if (auto* InternalChild =
            dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
        InternalNodesToVisit.push_back(InternalChild);
        continue;
      }

      if (Length < MinLength) {
        continue;
      }

      // Have an occurrence of a potentially repeated string. Save it.
      auto* Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
      RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
    }

    // The root never represents a repeated substring. If we're looking at
    // that, then skip it.
    if (Curr->isRoot()) {
      continue;
    }

    // Do we have any repeated substrings?
    if (RepeatedSubstringStarts.size() < 2) {
      continue;
    }

    // Yes. Update the state to reflect this, and then bail out.
    N = Curr;
    RS.Length = Length;
    for (unsigned StartIdx : RepeatedSubstringStarts) {
      RS.StartIndices.push_back(StartIdx);
    }
    break;
  }
  // At this point, either NewRS is an empty RepeatedSubstring, or it was
  // set in the above loop. Similarly, N is either nullptr, or the node
  // associated with NewRS.
}

} // namespace wasm