diff options
Diffstat (limited to 'src/support/dfa_minimization.cpp')
-rw-r--r-- | src/support/dfa_minimization.cpp | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/src/support/dfa_minimization.cpp b/src/support/dfa_minimization.cpp new file mode 100644 index 000000000..5df67c585 --- /dev/null +++ b/src/support/dfa_minimization.cpp @@ -0,0 +1,362 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <map> + +#include "dfa_minimization.h" + +namespace wasm::DFA { + +namespace { + +// A vector of initial partitions, each of which is a vector of elements, each +// of which is a vector of successor indices. +using InputGraph = std::vector<std::vector<std::vector<size_t>>>; + +// The Refined Partitions data structure used in Valmari-Lehtinen DFA +// minimization. The translation from terms used in the Valmari-Lehtinen paper +// to the more expanded terms used here is: +// +// Block => Set +// elems => elements +// loc => elementIndices +// sidx => setIndices +// first => beginnings +// end => endings +// mid => pivots +// +struct Partitions { + // The number of sets. + size_t sets = 0; + + // The partitioned elements. Elements in the same set are next to each other. + // Within each set, "marked" elements come first followed by "unmarked" + // elements. + std::vector<size_t> elements; + + // Maps elements to their indices in `elements`. + std::vector<size_t> elementIndices; + + // Maps elements to their sets, identified by an index. + std::vector<size_t> setIndices; + + // Maps sets to the indices of their first elements in `elements`. + std::vector<size_t> beginnings; + + // Maps sets to (one past) the indices of their ends in `elements`. + std::vector<size_t> endings; + + // Maps sets to the indices of their first unmarked elements in `elements`. + std::vector<size_t> pivots; + + Partitions() = default; + + // Allocate space up front so we never need to re-allocate. The actual + // contents of all the vectors will need to be externally initialized, + // though. + Partitions(size_t size) + : elements(size), elementIndices(size), setIndices(size), beginnings(size), + endings(size), pivots(size) {} + + struct Set { + using Iterator = std::vector<size_t>::iterator; + + Partitions& partitions; + size_t index; + + Set(Partitions& partitions, size_t index) + : partitions(partitions), index(index) {} + + Iterator begin() { + return partitions.elements.begin() + partitions.beginnings[index]; + } + Iterator end() { + return partitions.elements.begin() + partitions.endings[index]; + } + size_t size() { + return partitions.endings[index] - partitions.beginnings[index]; + } + + bool hasMarks() { + return partitions.pivots[index] != partitions.beginnings[index]; + } + + // Split the set between marked and unmarked elements if there are both + // marked and unmarked elements. Unmark all elements of this set regardless. + // Return the index of the new partition or 0 if there was no split. + size_t split() { + size_t begin = partitions.beginnings[index]; + size_t end = partitions.endings[index]; + size_t pivot = partitions.pivots[index]; + if (pivot == begin) { + // No elements marked, so there is nothing to do. + return 0; + } + if (pivot == end) { + // All elements were marked, so just unmark them. + partitions.pivots[index] = begin; + return 0; + } + // Create a new set covering the marked region. + size_t newIndex = partitions.sets++; + partitions.beginnings[newIndex] = begin; + partitions.pivots[newIndex] = begin; + partitions.endings[newIndex] = pivot; + for (size_t i = begin; i < pivot; ++i) { + partitions.setIndices[partitions.elements[i]] = newIndex; + } + // Update the old set. The end and pivot are already correct. + partitions.beginnings[index] = pivot; + return newIndex; + } + }; + + Set getSet(size_t index) { return {*this, index}; } + + // Returns the set containing an element, which can be iterated upon. The set + // may be invalidated by calls to `mark` or `Set::split`. + Set getSetForElem(size_t element) { return getSet(setIndices[element]); } + + void mark(size_t element) { + size_t index = elementIndices[element]; + size_t set = setIndices[element]; + size_t pivot = pivots[set]; + if (index >= pivot) { + // Move the pivot element into the location of the newly marked element. + elements[index] = elements[pivot]; + elementIndices[elements[index]] = index; + // Move the newly marked element into the pivot location. + elements[pivot] = element; + elementIndices[element] = pivot; + // Update the pivot index to mark the element. + ++pivots[set]; + } + } +}; + +Partitions initializeStatePartitions(const InputGraph& inputGraph, + size_t numElements) { + Partitions partitions(numElements); + size_t elementIndex = 0; + for (const auto& partition : inputGraph) { + size_t set = partitions.sets++; + partitions.beginnings[set] = elementIndex; + partitions.pivots[set] = elementIndex; + for (size_t i = 0; i < partition.size(); ++i) { + partitions.elements[elementIndex] = elementIndex; + partitions.elementIndices[elementIndex] = elementIndex; + partitions.setIndices[elementIndex] = set; + ++elementIndex; + } + partitions.endings[set] = elementIndex; + } + return partitions; +} + +// A DFA transition into a state. +struct Transition { + size_t pred; + size_t label; +}; + +void initializeTransitions(const InputGraph& inputGraph, + size_t numElements, + size_t numTransitions, + std::vector<Transition>& transitions, + std::vector<size_t>& transitionIndices) { + // Find the transitions into each state. Map destinations to input + // transitions. + std::map<size_t, std::vector<Transition>> transitionMap; + size_t elementIndex = 0; + for (const auto& partition : inputGraph) { + for (const auto& elem : partition) { + size_t label = 0; + for (const auto& succ : elem) { + transitionMap[succ].push_back({elementIndex, label++}); + } + ++elementIndex; + } + } + + // Populate `transitions` and `transitionIndices`. + transitions.reserve(numTransitions); + transitionIndices.reserve(numElements + 1); + for (size_t dest = 0; dest < numElements; ++dest) { + // Record the first index of transitions leading to `dest`. + transitionIndices.push_back(transitions.size()); + if (auto it = transitionMap.find(dest); it != transitionMap.end()) { + transitions.insert( + transitions.end(), it->second.begin(), it->second.end()); + } + } + // Record one-past the end of the transitions leading to the final `dest`. + transitionIndices.push_back(transitions.size()); +} + +Partitions +initializeSplitterPartitions(Partitions& partitions, + const std::vector<Transition>& transitions, + const std::vector<size_t>& transitionIndices) { + // The initial sets of splitters are partitioned by destination state + // partition and transition label. + Partitions splitters(transitions.size()); + size_t elementIndex = 0; + for (size_t statePartition = 0; statePartition < partitions.sets; + ++statePartition) { + // The in-transitions leading to states in the current partition, organized + // by transition label. + std::map<size_t, std::vector<size_t>> currTransitions; + for (size_t state : partitions.getSet(statePartition)) { + for (size_t transition = transitionIndices[state], + end = transitionIndices[state + 1]; + transition < end; + ++transition) { + currTransitions[transitions[transition].label].push_back(transition); + } + } + // Create a splitter partition for each in-transition label leading to the + // current state partition. + for (auto& pair : currTransitions) { + size_t set = splitters.sets++; + splitters.beginnings[set] = elementIndex; + splitters.pivots[set] = elementIndex; + for (size_t transition : pair.second) { + splitters.elements[elementIndex] = transition; + splitters.elementIndices[transition] = elementIndex; + splitters.setIndices[transition] = set; + ++elementIndex; + } + splitters.endings[set] = elementIndex; + } + } + return splitters; +} + +} // anonymous namespace + +namespace Internal { + +std::vector<std::vector<size_t>> +refinePartitionsImpl(const InputGraph& inputGraph) { + // Find the number of states and transitions. + size_t numElements = 0; + size_t numTransitions = 0; + for (const auto& partition : inputGraph) { + numElements += partition.size(); + for (const auto& elem : partition) { + numTransitions += elem.size(); + } + } + + // The partitions of DFA states. + Partitions partitions = initializeStatePartitions(inputGraph, numElements); + + // The transitions arranged such that the transitions leading to state `q` are + // `transitions[transitionIndices[q] : transitionIndices[q+1]]`. + std::vector<Transition> transitions; + std::vector<size_t> transitionIndices; + initializeTransitions( + inputGraph, numElements, numTransitions, transitions, transitionIndices); + + // The splitters, which are partitions of the input transitions. + Partitions splitters = + initializeSplitterPartitions(partitions, transitions, transitionIndices); + + // The list of splitter partitions that might be able to split states in some + // state partition. Starts out containing all splitter partitions. + std::vector<size_t> potentialSplitters; + potentialSplitters.reserve(splitters.sets); + for (size_t i = 0; i < splitters.sets; ++i) { + potentialSplitters.push_back(i); + } + + while (!potentialSplitters.empty()) { + size_t potentialSplitter = potentialSplitters.back(); + potentialSplitters.pop_back(); + + // The partitions that may be able to be split. + std::vector<size_t> markedPartitions; + + // Mark states that are predecessors via this splitter partition. + for (size_t transition : splitters.getSet(potentialSplitter)) { + size_t state = transitions[transition].pred; + auto partition = partitions.getSetForElem(state); + if (!partition.hasMarks()) { + markedPartitions.push_back(partition.index); + } + partitions.mark(state); + } + + // Try to split each partition with marked states. + for (size_t partition : markedPartitions) { + size_t newPartition = partitions.getSet(partition).split(); + if (!newPartition) { + // There was nothing to split. + continue; + } + + // We only want to keep using the smaller of the two split partitions. + if (partitions.getSet(newPartition).size() < + partitions.getSet(partition).size()) { + newPartition = partition; + } + + // The splitter partitions that may need to be split to match the new + // split of the state partitions. + std::vector<size_t> markedSplitters; + + // Mark transitions that lead to the newly split off states. + for (size_t state : partitions.getSet(newPartition)) { + for (size_t t = transitionIndices[state], + end = transitionIndices[state + 1]; + t < end; + ++t) { + auto splitter = splitters.getSetForElem(t); + if (!splitter.hasMarks()) { + markedSplitters.push_back(splitter.index); + } + splitters.mark(t); + } + } + + // Split the splitters and update `potentialSplitters`. + for (size_t splitter : markedSplitters) { + size_t newSplitter = splitters.getSet(splitter).split(); + if (newSplitter) { + potentialSplitters.push_back(newSplitter); + } + } + } + } + + // Return the refined partitions. + std::vector<std::vector<size_t>> resultPartitions; + resultPartitions.reserve(partitions.sets); + for (size_t p = 0; p < partitions.sets; ++p) { + auto partition = partitions.getSet(p); + std::vector<size_t> resultPartition; + resultPartition.reserve(partition.size()); + for (size_t elem : partition) { + resultPartition.push_back(elem); + } + resultPartitions.emplace_back(std::move(resultPartition)); + } + return resultPartitions; +} + +} // namespace Internal + +} // namespace wasm::DFA |