/*
 * Copyright 2022 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Finds types which are only created in assignments to immutable globals. For
// such types we can replace a struct.get with a global.get when there is a
// single possible global, or if there are two then with this pattern:
//
//  (struct.get $foo i
//    (..ref..))
//  =>
//  (select
//    (value1)
//    (value2)
//    (ref.eq
//      (..ref..)
//      (global.get $global1)))
//
// That is a valid transformation if there are only two struct.news of $foo, it
// is created in two immutable globals $global1 and $global2, the field is
// immutable, the values of field |i| in them are value1 and value2
// respectively, and $foo has no subtypes. In that situation, the reference must
// be one of those two, so we can compare the reference to the globals and pick
// the right value there. (We can also handle subtypes, if we look at their
// values as well, see below.)
//
// The benefit of this optimization is primarily in the case of constant values
// that we can heavily optimize, like function references (constant function
// refs let us inline, etc.). Function references cannot be directly compared,
// so we cannot use ConstantFieldPropagation or such with an extension to
// multiple values, as the select pattern shown above can't be used - it needs a
// comparison. But we can compare structs, so if the function references are in
// vtables, and the vtables follow the above pattern, then we can optimize.
//
// TODO: Only do the case with a select when shrinkLevel == 0?
//

#include <variant>

#include "ir/bits.h"
#include "ir/debuginfo.h"
#include "ir/find_all.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/possible-constant.h"
#include "ir/subtypes.h"
#include "ir/utils.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"

namespace wasm {

namespace {

struct GlobalStructInference : public Pass {
  // Only modifies struct.get operations.
  bool requiresNonNullableLocalFixups() override { return false; }

  // Maps optimizable struct types to the globals whose init is a struct.new of
  // them.
  //
  // We will remove unoptimizable types from here, so in practice, if a type is
  // optimizable it will have an entry here, and not if not.
  std::unordered_map<HeapType, std::vector<Name>> typeGlobals;

  void run(Module* module) override {
    if (!module->features.hasGC()) {
      return;
    }

    if (!getPassOptions().closedWorld) {
      Fatal() << "GSI requires --closed-world";
    }

    // First, find all the information we need. We need to know which struct
    // types are created in functions, because we will not be able to optimize
    // those.

    using HeapTypes = std::unordered_set<HeapType>;

    ModuleUtils::ParallelFunctionAnalysis<HeapTypes> analysis(
      *module, [&](Function* func, HeapTypes& types) {
        if (func->imported()) {
          return;
        }

        for (auto* structNew : FindAll<StructNew>(func->body).list) {
          auto type = structNew->type;
          if (type.isRef()) {
            types.insert(type.getHeapType());
          }
        }
      });

    // We cannot optimize types that appear in a struct.new in a function, which
    // we just collected and merge now.
    HeapTypes unoptimizable;

    for (auto& [func, types] : analysis.map) {
      for (auto type : types) {
        unoptimizable.insert(type);
      }
    }

    // Process the globals.
    for (auto& global : module->globals) {
      if (global->imported()) {
        continue;
      }

      // We cannot optimize a type that appears in a non-toplevel location in a
      // global init.
      for (auto* structNew : FindAll<StructNew>(global->init).list) {
        auto type = structNew->type;
        if (type.isRef() && structNew != global->init) {
          unoptimizable.insert(type.getHeapType());
        }
      }

      if (!global->init->type.isRef()) {
        continue;
      }

      auto type = global->init->type.getHeapType();

      // The global's declared type must match the init's type. If not, say if
      // we had a global declared as type |any| but that contains (ref $A), then
      // that is not something we can optimize, as ref.eq on a global.get of
      // that global will not validate. (This should not be a problem after
      // GlobalSubtyping runs, which will specialize the type of the global.)
      if (global->type != global->init->type) {
        unoptimizable.insert(type);
        continue;
      }

      // We cannot optimize mutable globals.
      if (global->mutable_) {
        unoptimizable.insert(type);
        continue;
      }

      // Finally, if this is a struct.new then it is one we can optimize; note
      // it.
      if (global->init->is<StructNew>()) {
        typeGlobals[type].push_back(global->name);
      }
    }

    // A struct.get might also read from any of the subtypes. As a result, an
    // unoptimizable type makes all its supertypes unoptimizable as well.
    // TODO: this could be specific per field (and not all supers have all
    //       fields)
    // Iterate on a copy to avoid invalidation as we insert.
    auto unoptimizableCopy = unoptimizable;
    for (auto type : unoptimizableCopy) {
      while (1) {
        unoptimizable.insert(type);

        // Also erase the globals, as we will never read them anyhow. This can
        // allow us to skip unneeded work, when we check if typeGlobals is
        // empty, below.
        typeGlobals.erase(type);

        auto super = type.getDeclaredSuperType();
        if (!super) {
          break;
        }
        type = *super;
      }
    }

    // Similarly, propagate global names: if one type has [global1], then a get
    // of any supertype might access that, so propagate to them.
    auto typeGlobalsCopy = typeGlobals;
    for (auto& [type, globals] : typeGlobalsCopy) {
      auto curr = type;
      while (1) {
        auto super = curr.getDeclaredSuperType();
        if (!super) {
          break;
        }
        curr = *super;

        // As above, avoid adding pointless data for anything unoptimizable.
        if (!unoptimizable.count(curr)) {
          for (auto global : globals) {
            typeGlobals[curr].push_back(global);
          }
        }
      }
    }

    if (typeGlobals.empty()) {
      // We found nothing we can optimize.
      return;
    }

    // The above loop on typeGlobalsCopy is on an unsorted data structure, and
    // that can lead to nondeterminism in typeGlobals. Sort the vectors there to
    // ensure determinism.
    for (auto& [type, globals] : typeGlobals) {
      std::sort(globals.begin(), globals.end());
    }

    // We are looking for the case where we can pick between two values using a
    // single comparison. More than two values, or more than a single
    // comparison, lead to tradeoffs that may not be worth it.
    //
    // Note that situation may involve more than two globals. For example we may
    // have three relevant globals, but two may have the same value. In that
    // case we can compare against the third:
    //
    //  $global0: (struct.new $Type (i32.const 42))
    //  $global1: (struct.new $Type (i32.const 42))
    //  $global2: (struct.new $Type (i32.const 1337))
    //
    // (struct.get $Type (ref))
    //   =>
    // (select
    //   (i32.const 1337)
    //   (i32.const 42)
    //   (ref.eq (ref) $global2))
    //
    // To discover these situations, we compute and group the possible values
    // that can be read from a particular struct.get, using the following data
    // structure.
    struct Value {
      // A value is either a constant, or if not, then we point to whatever
      // expression it is.
      std::variant<PossibleConstantValues, Expression*> content;
      // The list of globals that have this Value. In the example from above,
      // the Value for 42 would list globals = [$global0, $global1].
      // TODO: SmallVector?
      std::vector<Name> globals;

      bool isConstant() const {
        return std::get_if<PossibleConstantValues>(&content);
      }

      const PossibleConstantValues& getConstant() const {
        assert(isConstant());
        return std::get<PossibleConstantValues>(content);
      }

      Expression* getExpression() const {
        assert(!isConstant());
        return std::get<Expression*>(content);
      }
    };

    // Constant expressions are easy to handle, and we can emit a select as in
    // the last example. But we can handle non-constant ones too, by un-nesting
    // the relevant global. Imagine we have this:
    //
    //  (global $g (struct.new $S
    //    (struct.new $T ..)
    //
    // We have a nested struct.new here. That is not a constant value, but we
    // can turn it into a global.get:
    //
    //  (global $g.nested (struct.new $T ..)
    //  (global $g (struct.new $S
    //    (global.get $g.nested)
    //
    // After this un-nesting we end up with a global.get of an immutable global,
    // which is constant. Note that this adds a global and may increase code
    // size slightly, but if it lets us infer constant values that may lead to
    // devirtualization and other large benefits. Later passes can also re-nest.
    //
    // We do most of our optimization work in parallel, but we cannot add
    // globals in parallel, so instead we note the places we need to un-nest in
    // this data structure and process them at the end.
    struct GlobalToUnnest {
      // The global we want to refer to a nested part of, by un-nesting it. The
      // global contains a struct.new, and we want to refer to one of the
      // operands of the struct.new directly, which we can do by moving it out
      // to its own new global.
      Name global;
      // The index of the struct.new in the global named |global|.
      Index index;
      // The global.get that should refer to the new global. At the end, after
      // we create a new global and have a name for it, we update this get to
      // point to it.
      GlobalGet* get;
    };
    using GlobalsToUnnest = std::vector<GlobalToUnnest>;

    struct FunctionOptimizer : PostWalker<FunctionOptimizer> {
    private:
      GlobalStructInference& parent;
      GlobalsToUnnest& globalsToUnnest;

    public:
      FunctionOptimizer(GlobalStructInference& parent,
                        GlobalsToUnnest& globalsToUnnest)
        : parent(parent), globalsToUnnest(globalsToUnnest) {}

      bool refinalize = false;

      void visitStructGet(StructGet* curr) {
        auto type = curr->ref->type;
        if (type == Type::unreachable) {
          return;
        }

        // We must ignore the case of a non-struct heap type, that is, a bottom
        // type (which is all that is left after we've already ruled out
        // unreachable). Such things will not be in typeGlobals, which we are
        // checking now anyhow.
        auto heapType = type.getHeapType();
        auto iter = parent.typeGlobals.find(heapType);
        if (iter == parent.typeGlobals.end()) {
          return;
        }

        // This cannot be a bottom type as we found it in the typeGlobals map,
        // which only contains types of struct.news.
        assert(heapType.isStruct());

        // The field must be immutable.
        auto fieldIndex = curr->index;
        auto& field = heapType.getStruct().fields[fieldIndex];
        if (field.mutable_ == Mutable) {
          return;
        }

        const auto& globals = iter->second;
        if (globals.size() == 0) {
          return;
        }

        auto& wasm = *getModule();
        Builder builder(wasm);

        if (globals.size() == 1) {
          // Leave it to other passes to infer the constant value of the field,
          // if there is one: just change the reference to the global, which
          // will unlock those other optimizations. Note we must trap if the ref
          // is null, so add RefAsNonNull here.
          auto global = globals[0];
          auto globalType = wasm.getGlobal(global)->type;
          if (globalType != curr->ref->type) {
            // The struct.get will now read from something of the type of the
            // global, which is different, so the field being read might be
            // refined, which could change the struct.get's type.
            refinalize = true;
          }
          // No need to worry about atomic gets here. We will still read from
          // the same memory location as before and preserve all side effects
          // (including synchronization) that were previously present. The
          // memory location is immutable anyway, so there cannot be any writes
          // to synchronize with in the first place.
          curr->ref = builder.makeSequence(
            builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
            builder.makeGlobalGet(global, globalType));
          return;
        }

        // TODO: SmallVector?
        std::vector<Value> values;

        // Scan the relevant struct.new operands.
        auto fieldType = field.type;
        for (Index i = 0; i < globals.size(); i++) {
          Name global = globals[i];
          auto* structNew = wasm.getGlobal(global)->init->cast<StructNew>();
          // The value that is read from this struct.new.
          Value value;

          // Find the value read from the struct and represent it as a Value.
          PossibleConstantValues constant;
          if (structNew->isWithDefault()) {
            constant.note(Literal::makeZero(fieldType));
            value.content = constant;
          } else {
            Expression* operand = structNew->operands[fieldIndex];
            constant.note(operand, wasm);
            if (constant.isConstant()) {
              value.content = constant;
            } else {
              value.content = operand;
            }
          }

          // If the value is constant, it may be grouped as mentioned before.
          // See if it matches anything we've seen before.
          bool grouped = false;
          if (value.isConstant()) {
            for (auto& oldValue : values) {
              if (oldValue.isConstant() &&
                  oldValue.getConstant() == value.getConstant()) {
                // Add us to this group.
                oldValue.globals.push_back(global);
                grouped = true;
                break;
              }
            }
          }
          if (!grouped) {
            // This is a new value, so create a new group, unless we've seen too
            // many unique values. In that case, give up.
            if (values.size() == 2) {
              return;
            }
            value.globals.push_back(global);
            values.push_back(value);
          }
        }

        // Helper for optimization: Given a Value, returns what we should read
        // for it.
        auto getReadValue = [&](const Value& value) -> Expression* {
          Expression* ret;
          if (value.isConstant()) {
            // This is known to be a constant, so simply emit an expression for
            // that constant, and handle if the field is packed.
            auto* expr = value.getConstant().makeExpression(wasm);
            ret = Bits::makePackedFieldGet(expr, field, curr->signed_, wasm);
          } else {
            // Otherwise, this is non-constant, so we are in the situation where
            // we want to un-nest the value out of the struct.new it is in. Note
            // that for later work, as we cannot add a global in parallel.

            // There can only be one global in a value that is not constant,
            // which is the global we want to read from.
            assert(value.globals.size() == 1);

            // Create a global.get with temporary name, leaving only the
            // updating of the name to later work.
            auto* get = builder.makeGlobalGet(value.globals[0],
                                              value.getExpression()->type);

            globalsToUnnest.emplace_back(
              GlobalToUnnest{value.globals[0], fieldIndex, get});

            ret = get;
          }

          // This value replaces the struct.get, so it should have the same
          // source location.
          debuginfo::copyOriginalToReplacement(curr, ret, getFunction());

          return ret;
        };

        // We have some globals (at least 2), and so must have at least one
        // value. And we have already exited if we have more than 2 values (see
        // the early return above) so that only leaves 1 and 2.
        if (values.size() == 1) {
          // The case of 1 value is simple: trap if the ref is null, and
          // otherwise return the value. We must also fence if the get was
          // seqcst. No additional work is necessary for an acquire get because
          // there cannot have been any writes to this immutable field that it
          // would synchronize with.
          Expression* replacement =
            builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref));
          if (curr->order == MemoryOrder::SeqCst) {
            replacement =
              builder.blockify(replacement, builder.makeAtomicFence());
          }
          replaceCurrent(
            builder.blockify(replacement, getReadValue(values[0])));
          return;
        }
        assert(values.size() == 2);

        // We have two values. Check that we can pick between them using a
        // single comparison. While doing so, ensure that the index we can check
        // on is 0, that is, the first value has a single global.
        if (values[0].globals.size() == 1) {
          // The checked global is already in index 0.
        } else if (values[1].globals.size() == 1) {
          // Flip so the value to check is in index 0.
          std::swap(values[0], values[1]);
        } else {
          // Both indexes have more than one option, so we'd need more than one
          // comparison. Give up.
          return;
        }

        // Excellent, we can optimize here! Emit a select.

        auto checkGlobal = values[0].globals[0];
        // Compute the left and right values before the next line, as the order
        // of their execution matters (they may note globals for un-nesting).
        auto* left = getReadValue(values[0]);
        auto* right = getReadValue(values[1]);
        // Note that we must trap on null, so add a ref.as_non_null here. We
        // must also add a fence if this get is seqcst. As before, no extra work
        // is necessary for an acquire get because there cannot be a write it
        // synchronizes with.
        Expression* getGlobal =
          builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type);
        if (curr->order == MemoryOrder::SeqCst) {
          getGlobal =
            builder.makeSequence(builder.makeAtomicFence(), getGlobal);
        }
        replaceCurrent(builder.makeSelect(
          builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
                            getGlobal),
          left,
          right));
      }

      void visitFunction(Function* func) {
        if (refinalize) {
          ReFinalize().walkFunctionInModule(func, getModule());
        }
      }
    };

    // Find the optimization opportunitites in parallel.
    ModuleUtils::ParallelFunctionAnalysis<GlobalsToUnnest> optimization(
      *module, [&](Function* func, GlobalsToUnnest& globalsToUnnest) {
        if (func->imported()) {
          return;
        }

        FunctionOptimizer optimizer(*this, globalsToUnnest);
        optimizer.walkFunctionInModule(func, module);
      });

    // Un-nest any globals as needed, using the deterministic order of the
    // functions in the module.
    Builder builder(*module);
    auto addedGlobals = false;
    for (auto& func : module->functions) {
      // Each work item here is a global with a struct.new, from which we want
      // to read a particular index, from a particular global.get.
      for (auto& [globalName, index, get] : optimization.map[func.get()]) {
        auto* global = module->getGlobal(globalName);
        auto* structNew = global->init->cast<StructNew>();
        assert(index < structNew->operands.size());
        auto*& operand = structNew->operands[index];

        // If we already un-nested this then we don't need to repeat that work.
        if (auto* nestedGet = operand->dynCast<GlobalGet>()) {
          // We already un-nested, and this global.get refers to the new global.
          // Simply copy the target.
          get->name = nestedGet->name;
          assert(get->type == nestedGet->type);
        } else {
          // Add a new global, initialized to the operand.
          auto newName = Names::getValidGlobalName(
            *module,
            global->name.toString() + ".unnested." + std::to_string(index));
          module->addGlobal(builder.makeGlobal(
            newName, get->type, operand, Builder::Immutable));
          // Replace the operand with a get of that new global, and update the
          // original get to read the same.
          operand = builder.makeGlobalGet(newName, get->type);
          get->name = newName;
          addedGlobals = true;
        }
      }
    }

    if (addedGlobals) {
      // Sort the globals so that added ones appear before their uses.
      PassRunner runner(module);
      runner.add("reorder-globals-always");
      runner.setIsNested(true);
      runner.run();
    }
  }
};

} // anonymous namespace

Pass* createGlobalStructInferencePass() { return new GlobalStructInference(); }

} // namespace wasm