Add a pass to lower unaligned loads and stores (#2078)

This replaces the wasm2js code that lowered them to pessimistic (1-byte aligned) loads and stores. The new pass will do the optimal thing, keeping 2-byte alignment where possible. This is also nicer as a standalone pass, which has the simple property that after it runs all loads and stores are aligned, instead of some code scattered inside wasm2js.
author: Alon Zakai <azakai@google.com> 2019-05-02 11:41:34 -0700
committer: GitHub <noreply@github.com> 2019-05-02 11:41:34 -0700
commit: 01a4bfdb5c28d54fd480d603cba2d35c943a0bf5 (patch)
tree: 0bdeeb9096c8c7dec33a990022f920ebadfbd252 /src
parent: 3b4d9013c6c2dd6cfa90e02e2307a758a0f91140 (diff)
download: binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.gz
binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.bz2
binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.zip
5 files changed, 224 insertions, 97 deletions
diff --git a/src/passes/AlignmentLowering.cpp b/src/passes/AlignmentLowering.cpp
new file mode 100644
index 000000000..fc03a8b74
--- /dev/null
+++ b/src/passes/AlignmentLowering.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2017 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Lowers unaligned loads and stores into aligned loads and stores
+// that are smaller. This leaves only aligned operations.
+//
+
+#include "ir/bits.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+struct AlignmentLowering : public WalkerPass<PostWalker<AlignmentLowering>> {
+  void visitLoad(Load* curr) {
+    if (curr->align == 0 || curr->align == curr->bytes) {
+      return;
+    }
+    Builder builder(*getModule());
+    if (curr->type == unreachable) {
+      replaceCurrent(curr->ptr);
+      return;
+    }
+    assert(curr->type == i32); // TODO: i64, f32, f64
+    auto temp = builder.addVar(getFunction(), i32);
+    Expression* ret;
+    if (curr->bytes == 2) {
+      ret = builder.makeBinary(
+        OrInt32,
+        builder.makeLoad(
+          1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32),
+        builder.makeBinary(ShlInt32,
+                           builder.makeLoad(1,
+                                            false,
+                                            curr->offset + 1,
+                                            1,
+                                            builder.makeGetLocal(temp, i32),
+                                            i32),
+                           builder.makeConst(Literal(int32_t(8)))));
+      if (curr->signed_) {
+        ret = Bits::makeSignExt(ret, 2, *getModule());
+      }
+    } else if (curr->bytes == 4) {
+      if (curr->align == 1) {
+        ret = builder.makeBinary(
+          OrInt32,
+          builder.makeBinary(
+            OrInt32,
+            builder.makeLoad(
+              1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32),
+            builder.makeBinary(ShlInt32,
+                               builder.makeLoad(1,
+                                                false,
+                                                curr->offset + 1,
+                                                1,
+                                                builder.makeGetLocal(temp, i32),
+                                                i32),
+                               builder.makeConst(Literal(int32_t(8))))),
+          builder.makeBinary(
+            OrInt32,
+            builder.makeBinary(ShlInt32,
+                               builder.makeLoad(1,
+                                                false,
+                                                curr->offset + 2,
+                                                1,
+                                                builder.makeGetLocal(temp, i32),
+                                                i32),
+                               builder.makeConst(Literal(int32_t(16)))),
+            builder.makeBinary(ShlInt32,
+                               builder.makeLoad(1,
+                                                false,
+                                                curr->offset + 3,
+                                                1,
+                                                builder.makeGetLocal(temp, i32),
+                                                i32),
+                               builder.makeConst(Literal(int32_t(24))))));
+      } else if (curr->align == 2) {
+        ret = builder.makeBinary(
+          OrInt32,
+          builder.makeLoad(
+            2, false, curr->offset, 2, builder.makeGetLocal(temp, i32), i32),
+          builder.makeBinary(ShlInt32,
+                             builder.makeLoad(2,
+                                              false,
+                                              curr->offset + 2,
+                                              2,
+                                              builder.makeGetLocal(temp, i32),
+                                              i32),
+                             builder.makeConst(Literal(int32_t(16)))));
+      } else {
+        WASM_UNREACHABLE();
+      }
+    } else {
+      WASM_UNREACHABLE();
+    }
+    replaceCurrent(
+      builder.makeBlock({builder.makeSetLocal(temp, curr->ptr), ret}));
+  }
+
+  void visitStore(Store* curr) {
+    if (curr->align == 0 || curr->align == curr->bytes) {
+      return;
+    }
+    Builder builder(*getModule());
+    if (curr->type == unreachable) {
+      replaceCurrent(builder.makeBlock(
+        {builder.makeDrop(curr->ptr), builder.makeDrop(curr->value)}));
+      return;
+    }
+    assert(curr->value->type == i32); // TODO: i64, f32, f64
+    auto tempPtr = builder.addVar(getFunction(), i32);
+    auto tempValue = builder.addVar(getFunction(), i32);
+    auto* block =
+      builder.makeBlock({builder.makeSetLocal(tempPtr, curr->ptr),
+                         builder.makeSetLocal(tempValue, curr->value)});
+    if (curr->bytes == 2) {
+      block->list.push_back(
+        builder.makeStore(1,
+                          curr->offset,
+                          1,
+                          builder.makeGetLocal(tempPtr, i32),
+                          builder.makeGetLocal(tempValue, i32),
+                          i32));
+      block->list.push_back(builder.makeStore(
+        1,
+        curr->offset + 1,
+        1,
+        builder.makeGetLocal(tempPtr, i32),
+        builder.makeBinary(ShrUInt32,
+                           builder.makeGetLocal(tempValue, i32),
+                           builder.makeConst(Literal(int32_t(8)))),
+        i32));
+    } else if (curr->bytes == 4) {
+      if (curr->align == 1) {
+        block->list.push_back(
+          builder.makeStore(1,
+                            curr->offset,
+                            1,
+                            builder.makeGetLocal(tempPtr, i32),
+                            builder.makeGetLocal(tempValue, i32),
+                            i32));
+        block->list.push_back(builder.makeStore(
+          1,
+          curr->offset + 1,
+          1,
+          builder.makeGetLocal(tempPtr, i32),
+          builder.makeBinary(ShrUInt32,
+                             builder.makeGetLocal(tempValue, i32),
+                             builder.makeConst(Literal(int32_t(8)))),
+          i32));
+        block->list.push_back(builder.makeStore(
+          1,
+          curr->offset + 2,
+          1,
+          builder.makeGetLocal(tempPtr, i32),
+          builder.makeBinary(ShrUInt32,
+                             builder.makeGetLocal(tempValue, i32),
+                             builder.makeConst(Literal(int32_t(16)))),
+          i32));
+        block->list.push_back(builder.makeStore(
+          1,
+          curr->offset + 3,
+          1,
+          builder.makeGetLocal(tempPtr, i32),
+          builder.makeBinary(ShrUInt32,
+                             builder.makeGetLocal(tempValue, i32),
+                             builder.makeConst(Literal(int32_t(24)))),
+          i32));
+      } else if (curr->align == 2) {
+        block->list.push_back(
+          builder.makeStore(2,
+                            curr->offset,
+                            2,
+                            builder.makeGetLocal(tempPtr, i32),
+                            builder.makeGetLocal(tempValue, i32),
+                            i32));
+        block->list.push_back(builder.makeStore(
+          2,
+          curr->offset + 2,
+          2,
+          builder.makeGetLocal(tempPtr, i32),
+          builder.makeBinary(ShrUInt32,
+                             builder.makeGetLocal(tempValue, i32),
+                             builder.makeConst(Literal(int32_t(16)))),
+          i32));
+      } else {
+        WASM_UNREACHABLE();
+      }
+    } else {
+      WASM_UNREACHABLE();
+    }
+    block->finalize();
+    replaceCurrent(block);
+  }
+};
+
+Pass* createAlignmentLoweringPass() { return new AlignmentLowering(); }
+
+} // namespace wasm
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 8a4b04de8..935c3bec8 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -5,6 +5,7 @@ add_custom_command(
 
 SET(passes_SOURCES
   pass.cpp
+  AlignmentLowering.cpp
   CoalesceLocals.cpp
   CodePushing.cpp
   CodeFolding.cpp
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 7cfb21dfe..e4fbc5343 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -69,6 +69,9 @@ std::string PassRegistry::getPassDescription(std::string name) {
 void PassRegistry::registerPasses() {
   registerPass(
     "dae", "removes arguments to calls in an lto-like manner", createDAEPass);
+  registerPass("alignment-lowering",
+               "lower unaligned loads and stores to smaller aligned ones",
+               createAlignmentLoweringPass);
   registerPass("dae-optimizing",
                "removes arguments to calls in an lto-like manner, and "
                "optimizes where we removed",
diff --git a/src/passes/passes.h b/src/passes/passes.h
index fc01c1cd5..e562f4a42 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -22,6 +22,7 @@ namespace wasm {
 class Pass;
 
 // All passes:
+Pass* createAlignmentLoweringPass();
 Pass* createCoalesceLocalsPass();
 Pass* createCoalesceLocalsWithLearningPass();
 Pass* createCodeFoldingPass();
diff --git a/src/wasm2js.h b/src/wasm2js.h
index 1a5050af4..f15e8f9b7 100644
--- a/src/wasm2js.h
+++ b/src/wasm2js.h
@@ -284,6 +284,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) {
     // #1480
     runner.add("flatten");
     runner.add("i64-to-i32-lowering");
+    runner.add("alignment-lowering");
     // Next, optimize that as best we can. This should not generate
     // non-JS-friendly things.
     if (options.optimizeLevel > 0) {
@@ -982,41 +983,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
     }
 
     Ref visitLoad(Load* curr) {
-      if (curr->align != 0 && curr->align < curr->bytes) {
-        // set the pointer to a local
-        ScopedTemp temp(i32, parent, func);
-        SetLocal set(allocator);
-        set.index = func->getLocalIndex(temp.getName());
-        set.value = curr->ptr;
-        Ref ptrSet = visit(&set, NO_RESULT);
-        GetLocal get(allocator);
-        get.index = func->getLocalIndex(temp.getName());
-        // fake loads
-        Load load = *curr;
-        load.ptr = &get;
-        load.bytes = 1; // do the worst
-        load.signed_ = false;
-        Ref rest;
-        switch (curr->type) {
-          case i32: {
-            rest = makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT);
-            for (size_t i = 1; i < curr->bytes; i++) {
-              ++load.offset;
-              Ref add =
-                makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT);
-              add = ValueBuilder::makeBinary(
-                add, LSHIFT, ValueBuilder::makeNum(8 * i));
-              rest = ValueBuilder::makeBinary(rest, OR, add);
-            }
-            break;
-          }
-          default: {
-            std::cerr << "Unhandled type in load: " << curr->type << std::endl;
-            abort();
-          }
-        }
-        return ValueBuilder::makeSeq(ptrSet, rest);
-      }
+      // Unaligned loads and stores must have been fixed up already.
+      assert(curr->align == 0 || curr->align == curr->bytes);
       // normal load
       Ref ptr = makePointer(curr->ptr, curr->offset);
       Ref ret;
@@ -1110,68 +1078,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
       }
       // FIXME if memory growth, store ptr cannot contain a function call
       //       also other stores to memory, check them, all makeSub's
-      if (curr->align != 0 && curr->align < curr->bytes) {
-        // set the pointer to a local
-        ScopedTemp temp(i32, parent, func);
-        SetLocal set(allocator);
-        set.index = func->getLocalIndex(temp.getName());
-        set.value = curr->ptr;
-        Ref ptrSet = visit(&set, NO_RESULT);
-        GetLocal get(allocator);
-        get.index = func->getLocalIndex(temp.getName());
-        // set the value to a local
-        ScopedTemp tempValue(curr->value->type, parent, func);
-        SetLocal setValue(allocator);
-        setValue.index = func->getLocalIndex(tempValue.getName());
-        setValue.value = curr->value;
-        Ref valueSet = visit(&setValue, NO_RESULT);
-        GetLocal getValue(allocator);
-        getValue.index = func->getLocalIndex(tempValue.getName());
-        // fake stores
-        Store store = *curr;
-        store.ptr = &get;
-        store.bytes = 1; // do the worst
-        Ref rest;
-        switch (curr->valueType) {
-          case i32: {
-            Const _255(allocator);
-            _255.value = Literal(int32_t(255));
-            _255.type = i32;
-            for (size_t i = 0; i < curr->bytes; i++) {
-              Const shift(allocator);
-              shift.value = Literal(int32_t(8 * i));
-              shift.type = i32;
-              Binary shifted(allocator);
-              shifted.op = ShrUInt32;
-              shifted.left = &getValue;
-              shifted.right = &shift;
-              shifted.type = i32;
-              Binary anded(allocator);
-              anded.op = AndInt32;
-              anded.left = i > 0 ? static_cast<Expression*>(&shifted)
-                                 : static_cast<Expression*>(&getValue);
-              anded.right = &_255;
-              anded.type = i32;
-              store.value = &anded;
-              Ref part = visit(&store, NO_RESULT);
-              if (i == 0) {
-                rest = part;
-              } else {
-                rest = ValueBuilder::makeSeq(rest, part);
-              }
-              ++store.offset;
-            }
-            break;
-          }
-          default: {
-            std::cerr << "Unhandled type in store: " << curr->valueType
-                      << std::endl;
-            abort();
-          }
-        }
-        return ValueBuilder::makeSeq(ValueBuilder::makeSeq(ptrSet, valueSet),
-                                     rest);
-      }
+      // Unaligned loads and stores must have been fixed up already.
+      assert(curr->align == 0 || curr->align == curr->bytes);
       // normal store
       Ref ptr = makePointer(curr->ptr, curr->offset);
       Ref value = visit(curr->value, EXPRESSION_RESULT);
author	Alon Zakai <azakai@google.com>	2019-05-02 11:41:34 -0700
committer	GitHub <noreply@github.com>	2019-05-02 11:41:34 -0700
commit	01a4bfdb5c28d54fd480d603cba2d35c943a0bf5 (patch)
tree	0bdeeb9096c8c7dec33a990022f920ebadfbd252 /src
parent	3b4d9013c6c2dd6cfa90e02e2307a758a0f91140 (diff)
download	binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.gz binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.tar.bz2 binaryen-01a4bfdb5c28d54fd480d603cba2d35c943a0bf5.zip