diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/asm2wasm.h | 7 | ||||
-rw-r--r-- | src/passes/MemoryPacking.cpp | 719 | ||||
-rw-r--r-- | src/passes/pass.cpp | 2 | ||||
-rw-r--r-- | src/tools/asm2wasm.cpp | 9 |
4 files changed, 616 insertions, 121 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h index fb9635018..33e84bc33 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -1363,6 +1363,13 @@ void Asm2WasmBuilder::processAsm(Ref ast) { if (runOptimizationPasses) { optimizingBuilder->finish(); + // Now that we have a full module, do memory packing optimizations + { + PassRunner passRunner(&wasm, passOptions); + passRunner.options.lowMemoryUnused = true; + passRunner.add("memory-packing"); + passRunner.run(); + } // if we added any helper functions (like non-trapping i32-div, etc.), then // those have not been optimized (the optimizing builder has just been fed // the asm.js functions). Optimize those now. Typically there are very few, diff --git a/src/passes/MemoryPacking.cpp b/src/passes/MemoryPacking.cpp index 98c7cce00..d8e59cae6 100644 --- a/src/passes/MemoryPacking.cpp +++ b/src/passes/MemoryPacking.cpp @@ -14,7 +14,22 @@ * limitations under the License. */ +// +// Memory Packing. +// +// Reduces binary size by splitting data segments around ranges of zeros. This +// pass assumes that memory initialized by active segments is zero on +// instantiation and therefore simply drops the zero ranges from the active +// segments. For passive segments, we perform the same splitting, but we also +// record how each segment was split and update all bulk memory operations +// accordingly. To preserve trapping semantics for memory.init instructions, it +// is sometimes necessary to explicitly track whether input segments would have +// been dropped in globals. We are careful to emit only as many of these globals +// as necessary. +// + #include "ir/manipulation.h" +#include "ir/module-utils.h" #include "ir/utils.h" #include "pass.h" #include "wasm-binary.h" @@ -23,151 +38,633 @@ namespace wasm { -// Adding segments adds overhead, this is a rough estimate -const Index OVERHEAD = 8; +// A subsection of an orginal memory segment. If `isZero` is true, memory.fill +// will be used instead of memory.init for this range. +struct Range { + bool isZero; + size_t start; + size_t end; +}; + +// A function that produces the transformed bulk memory op. We need to use a +// function here instead of simple data because the replacement code sequence +// may require allocating new locals, which in turn requires the enclosing +// Function, which is only available in the parallelized instruction replacement +// phase. However, we can't move the entire calculation of replacement code +// sequences into the parallel phase because the lowering of data.drops depends +// on the lowering of memory.inits to determine whether a drop state global is +// necessary. The solution is that we calculate the shape of the replacement +// code sequence up front and use a closure just to allocate and insert new +// locals as necessary. +using Replacement = std::function<Expression*(Function*)>; + +// Maps each bulk memory op to the replacement that must be applied to it. +using Replacements = std::unordered_map<Expression*, Replacement>; + +// A collection of bulk memory operations referring to a particular segment +using Referrers = std::vector<Expression*>; + +// memory.init: 2 byte opcode + 1 byte segment index + 1 byte memory index + +// 3 x 2 byte operands +const size_t MEMORY_INIT_SIZE = 10; + +// memory.fill: 2 byte opcode + 1 byte memory index + 3 x 2 byte operands +const size_t MEMORY_FILL_SIZE = 9; + +// data.drop: 2 byte opcode + ~1 byte index immediate +const size_t DATA_DROP_SIZE = 3; + +namespace { + +Expression* makeShiftedMemorySize(Builder& builder) { + return builder.makeBinary(ShlInt32, + builder.makeHost(MemorySize, Name(), {}), + builder.makeConst(Literal(int32_t(16)))); +} + +} // anonymous namespace struct MemoryPacking : public Pass { - bool modifiesBinaryenIR() override { return false; } + size_t dropStateGlobalCount = 0; - void run(PassRunner* runner, Module* module) override { - if (!module->memory.exists) { - return; + void run(PassRunner* runner, Module* module) override; + void optimizeBulkMemoryOps(PassRunner* runner, Module* module); + void getSegmentReferrers(Module* module, std::vector<Referrers>& referrers); + void dropUnusedSegments(std::vector<Memory::Segment>& segments, + std::vector<Referrers>& referrers); + bool canSplit(const Memory::Segment& segment, const Referrers& referrers); + void calculateRanges(const Memory::Segment& segment, + const Referrers& referrers, + std::vector<Range>& ranges); + void createSplitSegments(Builder& builder, + const Memory::Segment& segment, + std::vector<Range>& ranges, + std::vector<Memory::Segment>& packed, + size_t segmentsRemaining); + void createReplacements(Module* module, + const std::vector<Range>& ranges, + const Referrers& referrers, + Replacements& replacements, + const Index segmentIndex); + void replaceBulkMemoryOps(PassRunner* runner, + Module* module, + Replacements& replacements); +}; + +void MemoryPacking::run(PassRunner* runner, Module* module) { + if (!module->memory.exists) { + return; + } + + auto& segments = module->memory.segments; + + // For each segment, a list of bulk memory instructions that refer to it + std::vector<Referrers> referrers(segments.size()); + + if (module->features.hasBulkMemory()) { + // Optimize out memory.inits and data.drops that can be entirely replaced + // with other instruction sequences. This can increase the number of unused + // segments that can be dropped entirely and allows later replacement + // creation to make more assumptions about what these instructions will look + // like, such as memory.inits not having both zero offset and size. + optimizeBulkMemoryOps(runner, module); + getSegmentReferrers(module, referrers); + dropUnusedSegments(segments, referrers); + } + + // The new, split memory segments + std::vector<Memory::Segment> packed; + + Replacements replacements; + Builder builder(*module); + for (size_t origIndex = 0; origIndex < segments.size(); ++origIndex) { + auto& segment = segments[origIndex]; + auto& currReferrers = referrers[origIndex]; + + std::vector<Range> ranges; + if (canSplit(segment, currReferrers)) { + calculateRanges(segment, currReferrers, ranges); + } else { + // A single range covers the entire segment. Set isZero to false so the + // original memory.init will be used even if segment is all zeroes. + ranges.push_back({false, 0, segment.data.size()}); } - if (module->features.hasBulkMemory()) { - // Remove any references to active segments that might be invalidated. - optimizeTrappingBulkMemoryOps(runner, module); - // Conservatively refuse to change segments if any are passive to avoid - // invalidating segment indices or segment contents referenced from - // memory.init and data.drop instructions. - // TODO: optimize in the presence of memory.init and data.drop - for (auto segment : module->memory.segments) { - if (segment.isPassive) { - return; + Index firstNewIndex = packed.size(); + size_t segmentsRemaining = segments.size() - origIndex; + createSplitSegments(builder, segment, ranges, packed, segmentsRemaining); + createReplacements( + module, ranges, currReferrers, replacements, firstNewIndex); + } + + segments.swap(packed); + + if (module->features.hasBulkMemory()) { + replaceBulkMemoryOps(runner, module, replacements); + } +} + +bool MemoryPacking::canSplit(const Memory::Segment& segment, + const Referrers& referrers) { + if (segment.isPassive) { + for (auto* referrer : referrers) { + if (auto* init = referrer->dynCast<MemoryInit>()) { + // Do not try to split if there is a nonconstant offset or size + if (!init->offset->is<Const>() || !init->size->is<Const>()) { + return false; } } } + return true; + } else { + // Active segments can only be split if they have constant offsets + return segment.offset->is<Const>(); + } +} - std::vector<Memory::Segment> packed; +void MemoryPacking::calculateRanges(const Memory::Segment& segment, + const Referrers& referrers, + std::vector<Range>& ranges) { + auto& data = segment.data; + if (data.size() == 0) { + return; + } - // we can only handle a constant offset for splitting - auto isSplittable = [&](const Memory::Segment& segment) { - return segment.offset->is<Const>(); - }; + // Calculate initial zero and nonzero ranges + size_t start = 0; + while (start < data.size()) { + size_t end = start; + while (end < data.size() && data[end] == 0) { + end++; + } + if (end > start) { + ranges.push_back({true, start, end}); + start = end; + } + while (end < data.size() && data[end] != 0) { + end++; + } + if (end > start) { + ranges.push_back({false, start, end}); + start = end; + } + } - for (auto& segment : module->memory.segments) { - if (!isSplittable(segment)) { - packed.push_back(segment); + // Calculate the number of consecutive zeroes for which splitting is + // beneficial. This is an approximation that assumes all memory.inits cover an + // entire segment and that all its arguments are constants. These assumptions + // are true of all memory.inits generated by the tools. + size_t threshold = 0; + if (segment.isPassive) { + // Passive segment metadata size + threshold += 2; + // Zeroes on the edge do not increase the number of segments or data.drops, + // so their threshold is lower. The threshold for interior zeroes depends on + // an estimate of the number of new memory.fill and data.drop instructions + // splitting would introduce. + size_t edgeThreshold = 0; + for (auto* referrer : referrers) { + if (referrer->is<MemoryInit>()) { + // Splitting adds a new memory.fill and a new memory.init + threshold += MEMORY_FILL_SIZE + MEMORY_INIT_SIZE; + edgeThreshold += MEMORY_FILL_SIZE; + } else { + threshold += DATA_DROP_SIZE; } } - size_t numRemaining = module->memory.segments.size() - packed.size(); + // Merge edge zeroes if they are not worth splitting + if (ranges.size() >= 2) { + auto last = ranges.end() - 1; + auto penultimate = ranges.end() - 2; + if (last->isZero && last->end - last->start <= edgeThreshold) { + penultimate->end = last->end; + ranges.erase(last); + } + } + if (ranges.size() >= 2) { + auto first = ranges.begin(); + auto second = ranges.begin() + 1; + if (first->isZero && first->end - first->start <= edgeThreshold) { + second->start = first->start; + ranges.erase(first); + } + } + } else { + // Legacy ballpark overhead of active segment with offset + // TODO: Tune this + threshold = 8; + } - // Split only if we have room for more segments - auto shouldSplit = [&]() { - return WebLimitations::MaxDataSegments > packed.size() + numRemaining; - }; + // Merge ranges across small spans of zeroes + std::vector<Range> mergedRanges = {ranges.front()}; + size_t i; + for (i = 1; i < ranges.size() - 1; ++i) { + auto left = mergedRanges.end() - 1; + auto curr = ranges.begin() + i; + auto right = ranges.begin() + i + 1; + if (curr->isZero && curr->end - curr->start <= threshold) { + left->end = right->end; + ++i; + } else { + mergedRanges.push_back(*curr); + } + } + // Add the final range if it hasn't already been merged in + if (i < ranges.size()) { + mergedRanges.push_back(ranges.back()); + } + std::swap(ranges, mergedRanges); +} - for (auto& segment : module->memory.segments) { - if (!isSplittable(segment)) { - continue; - } +void MemoryPacking::optimizeBulkMemoryOps(PassRunner* runner, Module* module) { + struct Optimizer : WalkerPass<PostWalker<Optimizer>> { + bool isFunctionParallel() override { return true; } + Pass* create() override { return new Optimizer; } - // skip final zeros - while (segment.data.size() > 0 && segment.data.back() == 0) { - segment.data.pop_back(); + bool needsRefinalizing; + + void visitMemoryInit(MemoryInit* curr) { + Builder builder(*getModule()); + Memory::Segment& segment = getModule()->memory.segments[curr->segment]; + size_t maxRuntimeSize = segment.isPassive ? segment.data.size() : 0; + bool mustNop = false; + bool mustTrap = false; + auto* offset = curr->offset->dynCast<Const>(); + auto* size = curr->size->dynCast<Const>(); + if (offset && uint32_t(offset->value.geti32()) > maxRuntimeSize) { + mustTrap = true; + } + if (size && uint32_t(size->value.geti32()) > maxRuntimeSize) { + mustTrap = true; + } + if (offset && size) { + uint64_t offsetVal(offset->value.geti32()); + uint64_t sizeVal(size->value.geti32()); + if (offsetVal + sizeVal > maxRuntimeSize) { + mustTrap = true; + } else if (offsetVal == 0 && sizeVal == 0) { + mustNop = true; + } } + assert(!mustNop || !mustTrap); + if (mustNop) { + // Offset and size are 0, so just trap if dest > memory.size + replaceCurrent(builder.makeIf( + builder.makeBinary( + GtUInt32, curr->dest, makeShiftedMemorySize(builder)), + builder.makeUnreachable())); + } else if (mustTrap) { + // Drop dest, offset, and size then trap + replaceCurrent(builder.blockify(builder.makeDrop(curr->dest), + builder.makeDrop(curr->offset), + builder.makeDrop(curr->size), + builder.makeUnreachable())); + needsRefinalizing = true; + } else if (!segment.isPassive) { + // trap if (dest > memory.size | offset | size) != 0 + replaceCurrent(builder.makeIf( + builder.makeBinary( + OrInt32, + builder.makeBinary( + GtUInt32, curr->dest, makeShiftedMemorySize(builder)), + builder.makeBinary(OrInt32, curr->offset, curr->size)), + builder.makeUnreachable())); + } + } + void visitDataDrop(DataDrop* curr) { + if (!getModule()->memory.segments[curr->segment].isPassive) { + ExpressionManipulator::nop(curr); + } + } + void doWalkFunction(Function* func) { + needsRefinalizing = false; + super::doWalkFunction(func); + if (needsRefinalizing) { + ReFinalize().walkFunctionInModule(func, getModule()); + } + } + } optimizer; + optimizer.run(runner, module); +} + +void MemoryPacking::getSegmentReferrers(Module* module, + std::vector<Referrers>& referrers) { + auto collectReferrers = [&](Function* func, + std::vector<Referrers>& referrers) { + if (func->imported()) { + return; + } + struct Collector : WalkerPass<PostWalker<Collector>> { + std::vector<Referrers>& referrers; + Collector(std::vector<Referrers>& referrers) : referrers(referrers) {} - if (!shouldSplit()) { - packed.push_back(segment); - continue; + void visitMemoryInit(MemoryInit* curr) { + referrers[curr->segment].push_back(curr); + } + void visitDataDrop(DataDrop* curr) { + referrers[curr->segment].push_back(curr); + } + void doWalkFunction(Function* func) { + referrers.resize(getModule()->memory.segments.size()); + super::doWalkFunction(func); } + } collector(referrers); + collector.walkFunctionInModule(func, module); + }; + ModuleUtils::ParallelFunctionAnalysis<std::vector<Referrers>> analysis( + *module, collectReferrers); + referrers.resize(module->memory.segments.size()); + for (auto& pair : analysis.map) { + std::vector<Referrers>& funcReferrers = pair.second; + for (size_t i = 0; i < funcReferrers.size(); ++i) { + referrers[i].insert( + referrers[i].end(), funcReferrers[i].begin(), funcReferrers[i].end()); + } + } +} - auto* offset = segment.offset->cast<Const>(); - // Find runs of zeros, and split - auto& data = segment.data; - auto base = offset->value.geti32(); - Index start = 0; - // create new segments - while (start < data.size()) { - // skip initial zeros - while (start < data.size() && data[start] == 0) { - start++; - } - Index end = start; // end of data-containing part - Index next = end; // after zeros we can skip. preserves next >= end - if (!shouldSplit()) { - next = end = data.size(); - } - while (next < data.size() && (next - end < OVERHEAD)) { - if (data[end] != 0) { - end++; - next = end; // we can try to skip zeros from here - } else { - // end is on a zero, we are looking to skip - if (data[next] != 0) { - end = next; // we must extend the segment, including some zeros - } else { - next++; - } - } - } - if (end != start) { - packed.emplace_back( - Builder(*module).makeConst(Literal(int32_t(base + start))), - &data[start], - end - start); +void MemoryPacking::dropUnusedSegments(std::vector<Memory::Segment>& segments, + std::vector<Referrers>& referrers) { + std::vector<Memory::Segment> usedSegments; + std::vector<Referrers> usedReferrers; + // Remove segments that are never used + // TODO: remove unused portions of partially used segments as well + for (size_t i = 0; i < segments.size(); ++i) { + bool used = false; + if (segments[i].isPassive) { + for (auto* referrer : referrers[i]) { + if (referrer->is<MemoryInit>()) { + used = true; + break; } - start = next; } - numRemaining--; + } else { + used = true; + } + if (used) { + usedSegments.push_back(segments[i]); + usedReferrers.push_back(referrers[i]); + } else { + // All referrers are data.drops. Make them nops. + for (auto* referrer : referrers[i]) { + ExpressionManipulator::nop(referrer); + } } - module->memory.segments.swap(packed); } + std::swap(segments, usedSegments); + std::swap(referrers, usedReferrers); +} - void optimizeTrappingBulkMemoryOps(PassRunner* runner, Module* module) { - struct Trapper : WalkerPass<PostWalker<Trapper>> { - bool isFunctionParallel() override { return true; } - bool changed; +void MemoryPacking::createSplitSegments(Builder& builder, + const Memory::Segment& segment, + std::vector<Range>& ranges, + std::vector<Memory::Segment>& packed, + size_t segmentsRemaining) { + for (size_t i = 0; i < ranges.size(); ++i) { + Range& range = ranges[i]; + if (range.isZero) { + continue; + } + Expression* offset = nullptr; + if (!segment.isPassive) { + if (auto* c = segment.offset->dynCast<Const>()) { + offset = + builder.makeConst(Literal(int32_t(c->value.geti32() + range.start))); + } else { + assert(ranges.size() == 1); + offset = segment.offset; + } + } + if (WebLimitations::MaxDataSegments <= packed.size() + segmentsRemaining) { + // Give up splitting and merge all remaining ranges except end zeroes + auto lastNonzero = ranges.end() - 1; + if (lastNonzero->isZero) { + --lastNonzero; + } + range.end = lastNonzero->end; + ranges.erase(ranges.begin() + i + 1, lastNonzero + 1); + } + packed.emplace_back(segment.isPassive, + offset, + &segment.data[range.start], + range.end - range.start); + } +} - Pass* create() override { return new Trapper; } +void MemoryPacking::createReplacements(Module* module, + const std::vector<Range>& ranges, + const Referrers& referrers, + Replacements& replacements, + const Index segmentIndex) { + // If there was no transformation, only update the indices + if (ranges.size() == 1 && !ranges.front().isZero) { + for (auto referrer : referrers) { + replacements[referrer] = [referrer, segmentIndex](Function*) { + if (auto* init = referrer->dynCast<MemoryInit>()) { + init->segment = segmentIndex; + } else if (auto* drop = referrer->dynCast<DataDrop>()) { + drop->segment = segmentIndex; + } else { + WASM_UNREACHABLE("Unexpected bulk memory operation"); + } + return referrer; + }; + } + return; + } - void visitMemoryInit(MemoryInit* curr) { - if (!getModule()->memory.segments[curr->segment].isPassive) { - Builder builder(*getModule()); - // trap if (dest > memory.size | offset | size) != 0 - replaceCurrent(builder.makeIf( - builder.makeBinary( - OrInt32, - builder.makeBinary( - GtUInt32, - curr->dest, - builder.makeBinary( - MulInt32, - builder.makeConst(Literal(Memory::kPageSize)), - builder.makeHost(MemorySize, Name(), {}))), - builder.makeBinary(OrInt32, curr->offset, curr->size)), - builder.makeUnreachable())); - changed = true; + Builder builder(*module); + + Name dropStateGlobal; + + // Return the drop state global, initializing it if it does not exist. This + // may change module-global state and has the important side effect of setting + // dropStateGlobal, so it must be evaluated eagerly, not in the replacements. + auto getDropStateGlobal = [&]() { + if (dropStateGlobal != Name()) { + return dropStateGlobal; + } + dropStateGlobal = Name(std::string("__mem_segment_drop_state_") + + std::to_string(dropStateGlobalCount++)); + module->addGlobal(builder.makeGlobal(dropStateGlobal, + Type::i32, + builder.makeConst(Literal(int32_t(0))), + Builder::Mutable)); + return dropStateGlobal; + }; + + // Create replacements for memory.init instructions first + for (auto referrer : referrers) { + auto* init = referrer->dynCast<MemoryInit>(); + if (init == nullptr) { + continue; + } + + // Nonconstant offsets or sizes will have inhibited splitting + size_t start = init->offset->cast<Const>()->value.geti32(); + size_t end = start + init->size->cast<Const>()->value.geti32(); + + // Index of the range from which this memory.init starts reading + size_t firstRangeIdx = 0; + while (firstRangeIdx < ranges.size() && + ranges[firstRangeIdx].end <= start) { + ++firstRangeIdx; + } + + // Handle zero-length memory.inits separately so we can later assume that + // start is in bounds and that some range will be intersected. + if (start == end) { + // Offset is nonzero because init would otherwise have previously been + // optimized out, so trap if the dest is out of bounds or the segment is + // dropped + Expression* result = builder.makeIf( + builder.makeBinary( + OrInt32, + builder.makeBinary( + GtUInt32, init->dest, makeShiftedMemorySize(builder)), + builder.makeGlobalGet(getDropStateGlobal(), Type::i32)), + builder.makeUnreachable()); + replacements[init] = [result](Function*) { return result; }; + continue; + } + + assert(firstRangeIdx < ranges.size()); + + // Split init into multiple memory.inits and memory.fills, storing the + // original base destination in a local if it is not a constant. If the + // first access is a memory.fill, explicitly check the drop status first to + // avoid writing zeroes when we should have trapped. + Expression* result = nullptr; + auto appendResult = [&](Expression* expr) { + result = result ? builder.blockify(result, expr) : expr; + }; + + // The local var holding the dest is not known until replacement time. Keep + // track of the locations where it will need to be patched in. + Index* setVar = nullptr; + std::vector<Index*> getVars; + if (!init->dest->is<Const>()) { + auto set = builder.makeLocalSet(-1, init->dest); + setVar = &set->index; + appendResult(set); + } + + // We only need to explicitly check the drop state when we will emit + // memory.fill first, since memory.init will implicitly do the check for us. + if (ranges[firstRangeIdx].isZero) { + appendResult( + builder.makeIf(builder.makeGlobalGet(getDropStateGlobal(), Type::i32), + builder.makeUnreachable())); + } + + size_t bytesWritten = 0; + + size_t initIndex = segmentIndex; + for (size_t i = firstRangeIdx; i < ranges.size() && ranges[i].start < end; + ++i) { + auto& range = ranges[i]; + + // Calculate dest, either as a const or as an addition to the dest local + Expression* dest; + if (auto* c = init->dest->dynCast<Const>()) { + dest = + builder.makeConst(Literal(int32_t(c->value.geti32() + bytesWritten))); + } else { + auto* get = builder.makeLocalGet(-1, Type::i32); + getVars.push_back(&get->index); + dest = get; + if (bytesWritten > 0) { + Const* addend = builder.makeConst(Literal(int32_t(bytesWritten))); + dest = builder.makeBinary(AddInt32, dest, addend); } } - void visitDataDrop(DataDrop* curr) { - if (!getModule()->memory.segments[curr->segment].isPassive) { - ExpressionManipulator::nop(curr); - changed = true; - } + + // How many bytes are read from this range + size_t bytes = std::min(range.end, end) - std::max(range.start, start); + Expression* size = builder.makeConst(Literal(int32_t(bytes))); + bytesWritten += bytes; + + // Create new memory.init or memory.fill + if (range.isZero) { + Expression* value = builder.makeConst(Literal::makeZero(Type::i32)); + appendResult(builder.makeMemoryFill(dest, value, size)); + } else { + size_t offsetBytes = std::max(start, range.start) - range.start; + Expression* offset = builder.makeConst(Literal(int32_t(offsetBytes))); + appendResult(builder.makeMemoryInit(initIndex, dest, offset, size)); + initIndex++; } - void doWalkFunction(Function* func) { - changed = false; - super::doWalkFunction(func); - if (changed) { - ReFinalize().walkFunctionInModule(func, getModule()); + } + + // Non-zero length memory.inits must have intersected some range + assert(result); + replacements[init] = [module, setVar, getVars, result](Function* function) { + if (setVar != nullptr) { + Index destVar = Builder(*module).addVar(function, Type::i32); + *setVar = destVar; + for (auto* getVar : getVars) { + *getVar = destVar; } } - } trapper; - trapper.run(runner, module); + return result; + }; } -}; + + // Create replacements for data.drop instructions now that we know whether we + // need a drop state global + for (auto drop : referrers) { + if (!drop->is<DataDrop>()) { + continue; + } + + Expression* result = nullptr; + auto appendResult = [&](Expression* expr) { + result = result ? builder.blockify(result, expr) : expr; + }; + + // Track drop state in a global only if some memory.init required it + if (dropStateGlobal != Name()) { + appendResult(builder.makeGlobalSet( + dropStateGlobal, builder.makeConst(Literal(int32_t(1))))); + } + size_t dropIndex = segmentIndex; + for (auto range : ranges) { + if (!range.isZero) { + appendResult(builder.makeDataDrop(dropIndex++)); + } + } + replacements[drop] = [result, module](Function*) { + return result ? result : Builder(*module).makeNop(); + }; + } +} + +void MemoryPacking::replaceBulkMemoryOps(PassRunner* runner, + Module* module, + Replacements& replacements) { + struct Replacer : WalkerPass<PostWalker<Replacer>> { + bool isFunctionParallel() override { return true; } + + Replacements& replacements; + + Replacer(Replacements& replacements) : replacements(replacements){}; + Pass* create() override { return new Replacer(replacements); } + + void visitMemoryInit(MemoryInit* curr) { + auto replacement = replacements.find(curr); + assert(replacement != replacements.end()); + replaceCurrent(replacement->second(getFunction())); + } + + void visitDataDrop(DataDrop* curr) { + auto replacement = replacements.find(curr); + assert(replacement != replacements.end()); + replaceCurrent(replacement->second(getFunction())); + } + } replacer(replacements); + replacer.run(runner, module); +} Pass* createMemoryPackingPass() { return new MemoryPacking(); } diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index ac4f6b661..80cc0c1c9 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -430,6 +430,7 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { void PassRunner::addDefaultGlobalOptimizationPrePasses() { add("duplicate-function-elimination"); + add("memory-packing"); } void PassRunner::addDefaultGlobalOptimizationPostPasses() { @@ -448,7 +449,6 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { add("simplify-globals"); } add("remove-unused-module-elements"); - add("memory-packing"); // may allow more inlining/dae/etc., need --converge for that add("directize"); // perform Stack IR optimizations here, at the very end of the diff --git a/src/tools/asm2wasm.cpp b/src/tools/asm2wasm.cpp index 6b20d1bcb..8b7e4b1b7 100644 --- a/src/tools/asm2wasm.cpp +++ b/src/tools/asm2wasm.cpp @@ -249,15 +249,6 @@ int main(int argc, const char* argv[]) { wasmOnly); asm2wasm.processAsm(asmjs); - // finalize the imported mem init - if (memInit != options.extra.end()) { - if (options.runningDefaultOptimizationPasses()) { - PassRunner runner(&wasm); - runner.add("memory-packing"); - runner.run(); - } - } - // Set the max memory size, if requested const auto& memMax = options.extra.find("mem max"); if (memMax != options.extra.end()) { |