summaryrefslogtreecommitdiff
path: root/src/support/dfa_minimization.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/support/dfa_minimization.cpp')
-rw-r--r--src/support/dfa_minimization.cpp362
1 files changed, 362 insertions, 0 deletions
diff --git a/src/support/dfa_minimization.cpp b/src/support/dfa_minimization.cpp
new file mode 100644
index 000000000..5df67c585
--- /dev/null
+++ b/src/support/dfa_minimization.cpp
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+
+#include "dfa_minimization.h"
+
+namespace wasm::DFA {
+
+namespace {
+
+// A vector of initial partitions, each of which is a vector of elements, each
+// of which is a vector of successor indices.
+using InputGraph = std::vector<std::vector<std::vector<size_t>>>;
+
+// The Refined Partitions data structure used in Valmari-Lehtinen DFA
+// minimization. The translation from terms used in the Valmari-Lehtinen paper
+// to the more expanded terms used here is:
+//
+// Block => Set
+// elems => elements
+// loc => elementIndices
+// sidx => setIndices
+// first => beginnings
+// end => endings
+// mid => pivots
+//
+struct Partitions {
+ // The number of sets.
+ size_t sets = 0;
+
+ // The partitioned elements. Elements in the same set are next to each other.
+ // Within each set, "marked" elements come first followed by "unmarked"
+ // elements.
+ std::vector<size_t> elements;
+
+ // Maps elements to their indices in `elements`.
+ std::vector<size_t> elementIndices;
+
+ // Maps elements to their sets, identified by an index.
+ std::vector<size_t> setIndices;
+
+ // Maps sets to the indices of their first elements in `elements`.
+ std::vector<size_t> beginnings;
+
+ // Maps sets to (one past) the indices of their ends in `elements`.
+ std::vector<size_t> endings;
+
+ // Maps sets to the indices of their first unmarked elements in `elements`.
+ std::vector<size_t> pivots;
+
+ Partitions() = default;
+
+ // Allocate space up front so we never need to re-allocate. The actual
+ // contents of all the vectors will need to be externally initialized,
+ // though.
+ Partitions(size_t size)
+ : elements(size), elementIndices(size), setIndices(size), beginnings(size),
+ endings(size), pivots(size) {}
+
+ struct Set {
+ using Iterator = std::vector<size_t>::iterator;
+
+ Partitions& partitions;
+ size_t index;
+
+ Set(Partitions& partitions, size_t index)
+ : partitions(partitions), index(index) {}
+
+ Iterator begin() {
+ return partitions.elements.begin() + partitions.beginnings[index];
+ }
+ Iterator end() {
+ return partitions.elements.begin() + partitions.endings[index];
+ }
+ size_t size() {
+ return partitions.endings[index] - partitions.beginnings[index];
+ }
+
+ bool hasMarks() {
+ return partitions.pivots[index] != partitions.beginnings[index];
+ }
+
+ // Split the set between marked and unmarked elements if there are both
+ // marked and unmarked elements. Unmark all elements of this set regardless.
+ // Return the index of the new partition or 0 if there was no split.
+ size_t split() {
+ size_t begin = partitions.beginnings[index];
+ size_t end = partitions.endings[index];
+ size_t pivot = partitions.pivots[index];
+ if (pivot == begin) {
+ // No elements marked, so there is nothing to do.
+ return 0;
+ }
+ if (pivot == end) {
+ // All elements were marked, so just unmark them.
+ partitions.pivots[index] = begin;
+ return 0;
+ }
+ // Create a new set covering the marked region.
+ size_t newIndex = partitions.sets++;
+ partitions.beginnings[newIndex] = begin;
+ partitions.pivots[newIndex] = begin;
+ partitions.endings[newIndex] = pivot;
+ for (size_t i = begin; i < pivot; ++i) {
+ partitions.setIndices[partitions.elements[i]] = newIndex;
+ }
+ // Update the old set. The end and pivot are already correct.
+ partitions.beginnings[index] = pivot;
+ return newIndex;
+ }
+ };
+
+ Set getSet(size_t index) { return {*this, index}; }
+
+ // Returns the set containing an element, which can be iterated upon. The set
+ // may be invalidated by calls to `mark` or `Set::split`.
+ Set getSetForElem(size_t element) { return getSet(setIndices[element]); }
+
+ void mark(size_t element) {
+ size_t index = elementIndices[element];
+ size_t set = setIndices[element];
+ size_t pivot = pivots[set];
+ if (index >= pivot) {
+ // Move the pivot element into the location of the newly marked element.
+ elements[index] = elements[pivot];
+ elementIndices[elements[index]] = index;
+ // Move the newly marked element into the pivot location.
+ elements[pivot] = element;
+ elementIndices[element] = pivot;
+ // Update the pivot index to mark the element.
+ ++pivots[set];
+ }
+ }
+};
+
+Partitions initializeStatePartitions(const InputGraph& inputGraph,
+ size_t numElements) {
+ Partitions partitions(numElements);
+ size_t elementIndex = 0;
+ for (const auto& partition : inputGraph) {
+ size_t set = partitions.sets++;
+ partitions.beginnings[set] = elementIndex;
+ partitions.pivots[set] = elementIndex;
+ for (size_t i = 0; i < partition.size(); ++i) {
+ partitions.elements[elementIndex] = elementIndex;
+ partitions.elementIndices[elementIndex] = elementIndex;
+ partitions.setIndices[elementIndex] = set;
+ ++elementIndex;
+ }
+ partitions.endings[set] = elementIndex;
+ }
+ return partitions;
+}
+
+// A DFA transition into a state.
+struct Transition {
+ size_t pred;
+ size_t label;
+};
+
+void initializeTransitions(const InputGraph& inputGraph,
+ size_t numElements,
+ size_t numTransitions,
+ std::vector<Transition>& transitions,
+ std::vector<size_t>& transitionIndices) {
+ // Find the transitions into each state. Map destinations to input
+ // transitions.
+ std::map<size_t, std::vector<Transition>> transitionMap;
+ size_t elementIndex = 0;
+ for (const auto& partition : inputGraph) {
+ for (const auto& elem : partition) {
+ size_t label = 0;
+ for (const auto& succ : elem) {
+ transitionMap[succ].push_back({elementIndex, label++});
+ }
+ ++elementIndex;
+ }
+ }
+
+ // Populate `transitions` and `transitionIndices`.
+ transitions.reserve(numTransitions);
+ transitionIndices.reserve(numElements + 1);
+ for (size_t dest = 0; dest < numElements; ++dest) {
+ // Record the first index of transitions leading to `dest`.
+ transitionIndices.push_back(transitions.size());
+ if (auto it = transitionMap.find(dest); it != transitionMap.end()) {
+ transitions.insert(
+ transitions.end(), it->second.begin(), it->second.end());
+ }
+ }
+ // Record one-past the end of the transitions leading to the final `dest`.
+ transitionIndices.push_back(transitions.size());
+}
+
+Partitions
+initializeSplitterPartitions(Partitions& partitions,
+ const std::vector<Transition>& transitions,
+ const std::vector<size_t>& transitionIndices) {
+ // The initial sets of splitters are partitioned by destination state
+ // partition and transition label.
+ Partitions splitters(transitions.size());
+ size_t elementIndex = 0;
+ for (size_t statePartition = 0; statePartition < partitions.sets;
+ ++statePartition) {
+ // The in-transitions leading to states in the current partition, organized
+ // by transition label.
+ std::map<size_t, std::vector<size_t>> currTransitions;
+ for (size_t state : partitions.getSet(statePartition)) {
+ for (size_t transition = transitionIndices[state],
+ end = transitionIndices[state + 1];
+ transition < end;
+ ++transition) {
+ currTransitions[transitions[transition].label].push_back(transition);
+ }
+ }
+ // Create a splitter partition for each in-transition label leading to the
+ // current state partition.
+ for (auto& pair : currTransitions) {
+ size_t set = splitters.sets++;
+ splitters.beginnings[set] = elementIndex;
+ splitters.pivots[set] = elementIndex;
+ for (size_t transition : pair.second) {
+ splitters.elements[elementIndex] = transition;
+ splitters.elementIndices[transition] = elementIndex;
+ splitters.setIndices[transition] = set;
+ ++elementIndex;
+ }
+ splitters.endings[set] = elementIndex;
+ }
+ }
+ return splitters;
+}
+
+} // anonymous namespace
+
+namespace Internal {
+
+std::vector<std::vector<size_t>>
+refinePartitionsImpl(const InputGraph& inputGraph) {
+ // Find the number of states and transitions.
+ size_t numElements = 0;
+ size_t numTransitions = 0;
+ for (const auto& partition : inputGraph) {
+ numElements += partition.size();
+ for (const auto& elem : partition) {
+ numTransitions += elem.size();
+ }
+ }
+
+ // The partitions of DFA states.
+ Partitions partitions = initializeStatePartitions(inputGraph, numElements);
+
+ // The transitions arranged such that the transitions leading to state `q` are
+ // `transitions[transitionIndices[q] : transitionIndices[q+1]]`.
+ std::vector<Transition> transitions;
+ std::vector<size_t> transitionIndices;
+ initializeTransitions(
+ inputGraph, numElements, numTransitions, transitions, transitionIndices);
+
+ // The splitters, which are partitions of the input transitions.
+ Partitions splitters =
+ initializeSplitterPartitions(partitions, transitions, transitionIndices);
+
+ // The list of splitter partitions that might be able to split states in some
+ // state partition. Starts out containing all splitter partitions.
+ std::vector<size_t> potentialSplitters;
+ potentialSplitters.reserve(splitters.sets);
+ for (size_t i = 0; i < splitters.sets; ++i) {
+ potentialSplitters.push_back(i);
+ }
+
+ while (!potentialSplitters.empty()) {
+ size_t potentialSplitter = potentialSplitters.back();
+ potentialSplitters.pop_back();
+
+ // The partitions that may be able to be split.
+ std::vector<size_t> markedPartitions;
+
+ // Mark states that are predecessors via this splitter partition.
+ for (size_t transition : splitters.getSet(potentialSplitter)) {
+ size_t state = transitions[transition].pred;
+ auto partition = partitions.getSetForElem(state);
+ if (!partition.hasMarks()) {
+ markedPartitions.push_back(partition.index);
+ }
+ partitions.mark(state);
+ }
+
+ // Try to split each partition with marked states.
+ for (size_t partition : markedPartitions) {
+ size_t newPartition = partitions.getSet(partition).split();
+ if (!newPartition) {
+ // There was nothing to split.
+ continue;
+ }
+
+ // We only want to keep using the smaller of the two split partitions.
+ if (partitions.getSet(newPartition).size() <
+ partitions.getSet(partition).size()) {
+ newPartition = partition;
+ }
+
+ // The splitter partitions that may need to be split to match the new
+ // split of the state partitions.
+ std::vector<size_t> markedSplitters;
+
+ // Mark transitions that lead to the newly split off states.
+ for (size_t state : partitions.getSet(newPartition)) {
+ for (size_t t = transitionIndices[state],
+ end = transitionIndices[state + 1];
+ t < end;
+ ++t) {
+ auto splitter = splitters.getSetForElem(t);
+ if (!splitter.hasMarks()) {
+ markedSplitters.push_back(splitter.index);
+ }
+ splitters.mark(t);
+ }
+ }
+
+ // Split the splitters and update `potentialSplitters`.
+ for (size_t splitter : markedSplitters) {
+ size_t newSplitter = splitters.getSet(splitter).split();
+ if (newSplitter) {
+ potentialSplitters.push_back(newSplitter);
+ }
+ }
+ }
+ }
+
+ // Return the refined partitions.
+ std::vector<std::vector<size_t>> resultPartitions;
+ resultPartitions.reserve(partitions.sets);
+ for (size_t p = 0; p < partitions.sets; ++p) {
+ auto partition = partitions.getSet(p);
+ std::vector<size_t> resultPartition;
+ resultPartition.reserve(partition.size());
+ for (size_t elem : partition) {
+ resultPartition.push_back(elem);
+ }
+ resultPartitions.emplace_back(std::move(resultPartition));
+ }
+ return resultPartitions;
+}
+
+} // namespace Internal
+
+} // namespace wasm::DFA