From 6098d7c270a3b16c4c68dd6996597eae2a74a5f4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 9 Nov 2015 11:24:06 -0800 Subject: restructure code, put emscripten-optimizer stuff in its own dir --- src/colors.h | 64 - src/emscripten-optimizer/colors.h | 64 + src/emscripten-optimizer/istring.h | 161 ++ src/emscripten-optimizer/optimizer-shared.cpp | 125 + src/emscripten-optimizer/optimizer.cpp | 3851 +++++++++++++++++++++++++ src/emscripten-optimizer/optimizer.h | 127 + src/emscripten-optimizer/parser.cpp | 144 + src/emscripten-optimizer/parser.h | 905 ++++++ src/emscripten-optimizer/simple_ast.cpp | 259 ++ src/emscripten-optimizer/simple_ast.h | 1542 ++++++++++ src/emscripten-optimizer/snprintf.h | 32 + src/istring.h | 161 -- src/optimizer-shared.cpp | 125 - src/optimizer.cpp | 3851 ------------------------- src/optimizer.h | 127 - src/parser.cpp | 144 - src/parser.h | 905 ------ src/simple_ast.cpp | 259 -- src/simple_ast.h | 1542 ---------- src/snprintf.h | 32 - 20 files changed, 7210 insertions(+), 7210 deletions(-) delete mode 100644 src/colors.h create mode 100644 src/emscripten-optimizer/colors.h create mode 100644 src/emscripten-optimizer/istring.h create mode 100644 src/emscripten-optimizer/optimizer-shared.cpp create mode 100644 src/emscripten-optimizer/optimizer.cpp create mode 100644 src/emscripten-optimizer/optimizer.h create mode 100644 src/emscripten-optimizer/parser.cpp create mode 100644 src/emscripten-optimizer/parser.h create mode 100644 src/emscripten-optimizer/simple_ast.cpp create mode 100644 src/emscripten-optimizer/simple_ast.h create mode 100644 src/emscripten-optimizer/snprintf.h delete mode 100644 src/istring.h delete mode 100644 src/optimizer-shared.cpp delete mode 100644 src/optimizer.cpp delete mode 100644 src/optimizer.h delete mode 100644 src/parser.cpp delete mode 100644 src/parser.h delete mode 100644 src/simple_ast.cpp delete mode 100644 src/simple_ast.h delete mode 100644 src/snprintf.h (limited to 'src') diff --git a/src/colors.h b/src/colors.h deleted file mode 100644 index 96a7baf29..000000000 --- a/src/colors.h +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include - -struct Colors { - static bool use; - - static void normal(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[0m"; -#endif - } - static void red(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[31m"; -#endif - } - static void magenta(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[35m"; -#endif - } - static void orange(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[33m"; -#endif - } - static void grey(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[37m"; -#endif - } - static void green(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[32m"; -#endif - } - static void blue(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[34m"; -#endif - } - static void bold(std::ostream& stream) { - if (!use) return; -#if defined(__linux__) || defined(__apple__) - stream << "\033[1m"; -#endif - } - - Colors() { - use = (getenv("COLORS") && getenv("COLORS")[0] == '1') || // forced - (isatty(STDOUT_FILENO) && (!getenv("COLORS") || getenv("COLORS")[0] != '0')); // implicit - } -} colors; - -bool Colors::use; - diff --git a/src/emscripten-optimizer/colors.h b/src/emscripten-optimizer/colors.h new file mode 100644 index 000000000..96a7baf29 --- /dev/null +++ b/src/emscripten-optimizer/colors.h @@ -0,0 +1,64 @@ +#include +#include +#include + +struct Colors { + static bool use; + + static void normal(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[0m"; +#endif + } + static void red(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[31m"; +#endif + } + static void magenta(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[35m"; +#endif + } + static void orange(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[33m"; +#endif + } + static void grey(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[37m"; +#endif + } + static void green(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[32m"; +#endif + } + static void blue(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[34m"; +#endif + } + static void bold(std::ostream& stream) { + if (!use) return; +#if defined(__linux__) || defined(__apple__) + stream << "\033[1m"; +#endif + } + + Colors() { + use = (getenv("COLORS") && getenv("COLORS")[0] == '1') || // forced + (isatty(STDOUT_FILENO) && (!getenv("COLORS") || getenv("COLORS")[0] != '0')); // implicit + } +} colors; + +bool Colors::use; + diff --git a/src/emscripten-optimizer/istring.h b/src/emscripten-optimizer/istring.h new file mode 100644 index 000000000..b77fec4e5 --- /dev/null +++ b/src/emscripten-optimizer/istring.h @@ -0,0 +1,161 @@ +// Interned String type, 100% interned on creation. Comparisons are always just a pointer comparison + +#ifndef __istring_h__ +#define __istring_h__ + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cashew { + +struct IString { + const char *str; + + static size_t hash_c(const char *str) { // see http://www.cse.yorku.ca/~oz/hash.html + unsigned int hash = 5381; + int c; + while ((c = *str++)) { + hash = ((hash << 5) + hash) ^ c; + } + return (size_t)hash; + } + + class CStringHash : public std::hash { + public: + size_t operator()(const char *str) const { + return IString::hash_c(str); + } + }; + class CStringEqual : public std::equal_to { + public: + bool operator()(const char *x, const char *y) const { + return strcmp(x, y) == 0; + } + }; + + IString() : str(nullptr) {} + IString(const char *s, bool reuse=true) { // if reuse=true, then input is assumed to remain alive; not copied + assert(s); + set(s, reuse); + } + + void set(const char *s, bool reuse=true) { + typedef std::unordered_set StringSet; + static StringSet* strings = new StringSet(); + + if (reuse) { + auto result = strings->insert(s); // if already present, does nothing + str = *(result.first); + } else { + auto existing = strings->find(s); + if (existing == strings->end()) { + char *copy = (char*)malloc(strlen(s)+1); // XXX leaked + strcpy(copy, s); + s = copy; + } else { + s = *existing; + } + strings->insert(s); + str = s; + } + } + + void set(const IString &s) { + str = s.str; + } + + void clear() { + str = nullptr; + } + + bool operator==(const IString& other) const { + //assert((str == other.str) == !strcmp(str, other.str)); + return str == other.str; // fast! + } + bool operator!=(const IString& other) const { + //assert((str == other.str) == !strcmp(str, other.str)); + return str != other.str; // fast! + } + bool operator<(const IString& other) const { + return strcmp(str ? str : "", other.str ? other.str : "") < 0; + } + + char operator[](int x) const { + return str[x]; + } + + bool operator!() const { // no string, or empty string + return !str || str[0] == 0; + } + + const char *c_str() const { return str; } + bool equals(const char *other) const { return !strcmp(str, other); } + + bool is() { return str != nullptr; } + bool isNull() { return str == nullptr; } +}; + +} // namespace cashew + +// Utilities for creating hashmaps/sets over IStrings + +namespace std { + +template <> struct hash : public unary_function { + size_t operator()(const cashew::IString& str) const { + size_t hash = size_t(str.str); + return hash = ((hash << 5) + hash) ^ 5381; /* (hash * 33) ^ c */ + } +}; + +template <> struct equal_to : public binary_function { + bool operator()(const cashew::IString& x, const cashew::IString& y) const { + return x == y; + } +}; + +} // namespace std + +namespace cashew { + +// IStringSet + +class IStringSet : public std::unordered_set { +public: + IStringSet() {} + IStringSet(const char *init) { // comma-delimited list + int size = strlen(init); + char *curr = new char[size+1]; // leaked! + strcpy(curr, init); + while (1) { + char *end = strchr(curr, ' '); + if (end) *end = 0; + insert(curr); + if (!end) break; + curr = end + 1; + } + } + + bool has(const IString& str) { + return count(str) > 0; + } +}; + +class IOrderedStringSet : public std::set { +public: + bool has(const IString& str) { + return count(str) > 0; + } +}; + +} // namespace cashew + +#endif // __istring_h__ + diff --git a/src/emscripten-optimizer/optimizer-shared.cpp b/src/emscripten-optimizer/optimizer-shared.cpp new file mode 100644 index 000000000..1c36efc62 --- /dev/null +++ b/src/emscripten-optimizer/optimizer-shared.cpp @@ -0,0 +1,125 @@ + +#include "optimizer.h" + +using namespace cashew; + +IString ASM_FLOAT_ZERO; + +IString SIMD_INT8X16_CHECK("SIMD_Int8x16_check"), + SIMD_INT16X8_CHECK("SIMD_Int16x8_check"), + SIMD_INT32X4_CHECK("SIMD_Int32x4_check"), + SIMD_FLOAT32X4_CHECK("SIMD_Float32x4_check"), + SIMD_FLOAT64X2_CHECK("SIMD_Float64x2_check"); + +bool isInteger(double x) { + return fmod(x, 1) == 0; +} + +bool isInteger32(double x) { + return isInteger(x) && (x == (int32_t)x || x == (uint32_t)x); +} + +int parseInt(const char *str) { + int ret = *str - '0'; + while (*(++str)) { + ret *= 10; + ret += *str - '0'; + } + return ret; +} + +HeapInfo parseHeap(const char *name) { + HeapInfo ret; + if (name[0] != 'H' || name[1] != 'E' || name[2] != 'A' || name[3] != 'P') { + ret.valid = false; + return ret; + } + ret.valid = true; + ret.unsign = name[4] == 'U'; + ret.floaty = name[4] == 'F'; + ret.bits = parseInt(name + (ret.unsign || ret.floaty ? 5 : 4)); + ret.type = !ret.floaty ? ASM_INT : (ret.bits == 64 ? ASM_DOUBLE : ASM_FLOAT); + return ret; +} + +AsmType detectType(Ref node, AsmData *asmData, bool inVarDef) { + switch (node[0]->getCString()[0]) { + case 'n': { + if (node[0] == NUM) { + if (!isInteger(node[1]->getNumber())) return ASM_DOUBLE; + return ASM_INT; + } else if (node[0] == NAME) { + if (asmData) { + AsmType ret = asmData->getType(node[1]->getCString()); + if (ret != ASM_NONE) return ret; + } + if (!inVarDef) { + if (node[1] == INF || node[1] == NaN) return ASM_DOUBLE; + if (node[1] == TEMP_RET0) return ASM_INT; + return ASM_NONE; + } + // We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0) + if (ASM_FLOAT_ZERO.isNull()) ASM_FLOAT_ZERO = node[1]->getIString(); + else assert(node[1] == ASM_FLOAT_ZERO); + return ASM_FLOAT; + } + break; + } + case 'u': { + if (node[0] == UNARY_PREFIX) { + switch (node[1]->getCString()[0]) { + case '+': return ASM_DOUBLE; + case '-': return detectType(node[2], asmData, inVarDef); + case '!': case '~': return ASM_INT; + } + break; + } + break; + } + case 'c': { + if (node[0] == CALL) { + if (node[1][0] == NAME) { + IString name = node[1][1]->getIString(); + if (name == MATH_FROUND) return ASM_FLOAT; + else if (name == SIMD_FLOAT32X4 || name == SIMD_FLOAT32X4_CHECK) return ASM_FLOAT32X4; + else if (name == SIMD_FLOAT64X2 || name == SIMD_FLOAT64X2_CHECK) return ASM_FLOAT64X2; + else if (name == SIMD_INT8X16 || name == SIMD_INT8X16_CHECK) return ASM_INT8X16; + else if (name == SIMD_INT16X8 || name == SIMD_INT16X8_CHECK) return ASM_INT16X8; + else if (name == SIMD_INT32X4 || name == SIMD_INT32X4_CHECK) return ASM_INT32X4; + } + return ASM_NONE; + } else if (node[0] == CONDITIONAL) { + return detectType(node[2], asmData, inVarDef); + } + break; + } + case 'b': { + if (node[0] == BINARY) { + switch (node[1]->getCString()[0]) { + case '+': case '-': + case '*': case '/': case '%': return detectType(node[2], asmData, inVarDef); + case '|': case '&': case '^': case '<': case '>': // handles <<, >>, >>=, <=, >= + case '=': case '!': { // handles ==, != + return ASM_INT; + } + } + } + break; + } + case 's': { + if (node[0] == SEQ) { + return detectType(node[2], asmData, inVarDef); + } else if (node[0] == SUB) { + assert(node[1][0] == NAME); + HeapInfo info = parseHeap(node[1][1]->getCString()); + if (info.valid) return ASM_NONE; + return info.floaty ? ASM_DOUBLE : ASM_INT; // XXX ASM_FLOAT? + } + break; + } + } + //dump("horrible", node); + //assert(0); + return ASM_NONE; +} + diff --git a/src/emscripten-optimizer/optimizer.cpp b/src/emscripten-optimizer/optimizer.cpp new file mode 100644 index 000000000..d569fb928 --- /dev/null +++ b/src/emscripten-optimizer/optimizer.cpp @@ -0,0 +1,3851 @@ +#include +#include +#include +#include +#include +#include + +#include "simple_ast.h" +#include "optimizer.h" + +#include "optimizer-shared.cpp" + +typedef std::vector StringVec; + +//================== +// Globals +//================== + +Ref extraInfo; + +//================== +// Infrastructure +//================== + +template +int indexOf(T list, V value) { + for (size_t i = 0; i < list.size(); i++) { + if (list[i] == value) return i; + } + return -1; +} + +int jsD2I(double x) { + return (int)((int64_t)x); +} + +char *strdupe(const char *str) { + char *ret = (char *)malloc(strlen(str)+1); // leaked! + strcpy(ret, str); + return ret; +} + +IString getHeapStr(int x, bool unsign) { + switch (x) { + case 8: return unsign ? HEAPU8 : HEAP8; + case 16: return unsign ? HEAPU16 : HEAP16; + case 32: return unsign ? HEAPU32 : HEAP32; + } + assert(0); + return ":("; +} + +Ref deStat(Ref node) { + if (node[0] == STAT) return node[1]; + return node; +} + +Ref getStatements(Ref node) { + if (node[0] == DEFUN) { + return node[3]; + } else if (node[0] == BLOCK) { + return node->size() > 1 ? node[1] : nullptr; + } else { + return arena.alloc(); + } +} + +// Types + +AsmType intToAsmType(int type) { + if (type >= 0 && type <= ASM_NONE) return (AsmType)type; + else { + assert(0); + return ASM_NONE; + } +} + +// forward decls +Ref makeEmpty(); +bool isEmpty(Ref node); +Ref makeAsmVarDef(const IString& v, AsmType type); +Ref makeArray(int size_hint); +Ref makeBool(bool b); +Ref makeNum(double x); +Ref makeName(IString str); +Ref makeAsmCoercion(Ref node, AsmType type); +Ref make1(IString type, Ref a); +Ref make3(IString type, Ref a, Ref b, Ref c); + +AsmData::AsmData(Ref f) { + func = f; + + // process initial params + Ref stats = func[3]; + size_t i = 0; + while (i < stats->size()) { + Ref node = stats[i]; + if (node[0] != STAT || node[1][0] != ASSIGN || node[1][2][0] != NAME) break; + node = node[1]; + Ref name = node[2][1]; + int index = func[2]->indexOf(name); + if (index < 0) break; // not an assign into a parameter, but a global + IString& str = name->getIString(); + if (locals.count(str) > 0) break; // already done that param, must be starting function body + locals[str] = Local(detectType(node[3]), true); + params.push_back(str); + stats[i] = makeEmpty(); + i++; + } + // process initial variable definitions and remove '= 0' etc parts - these + // are not actually assignments in asm.js + while (i < stats->size()) { + Ref node = stats[i]; + if (node[0] != VAR) break; + for (size_t j = 0; j < node[1]->size(); j++) { + Ref v = node[1][j]; + IString& name = v[0]->getIString(); + Ref value = v[1]; + if (locals.count(name) == 0) { + locals[name] = Local(detectType(value, nullptr, true), false); + vars.push_back(name); + v->setSize(1); // make an un-assigning var + } else { + assert(j == 0); // cannot break in the middle + goto outside; + } + } + i++; + } + outside: + // look for other var definitions and collect them + while (i < stats->size()) { + traversePre(stats[i], [&](Ref node) { + Ref type = node[0]; + if (type == VAR) { + dump("bad, seeing a var in need of fixing", func); + abort(); //, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node)); + } + }); + i++; + } + // look for final RETURN statement to get return type. + Ref retStmt = stats->back(); + if (!!retStmt && retStmt[0] == RETURN && !!retStmt[1]) { + ret = detectType(retStmt[1]); + } else { + ret = ASM_NONE; + } +} + +void AsmData::denormalize() { + Ref stats = func[3]; + // Remove var definitions, if any + for (size_t i = 0; i < stats->size(); i++) { + if (stats[i][0] == VAR) { + stats[i] = makeEmpty(); + } else { + if (!isEmpty(stats[i])) break; + } + } + // calculate variable definitions + Ref varDefs = makeArray(vars.size()); + for (auto v : vars) { + varDefs->push_back(makeAsmVarDef(v, locals[v].type)); + } + // each param needs a line; reuse emptyNodes as much as we can + size_t numParams = params.size(); + size_t emptyNodes = 0; + while (emptyNodes < stats->size()) { + if (!isEmpty(stats[emptyNodes])) break; + emptyNodes++; + } + size_t neededEmptyNodes = numParams + (varDefs->size() ? 1 : 0); // params plus one big var if there are vars + if (neededEmptyNodes > emptyNodes) { + stats->insert(0, neededEmptyNodes - emptyNodes); + } else if (neededEmptyNodes < emptyNodes) { + stats->splice(0, emptyNodes - neededEmptyNodes); + } + // add param coercions + int next = 0; + for (auto param : func[2]->getArray()) { + IString str = param->getIString(); + assert(locals.count(str) > 0); + stats[next++] = make1(STAT, make3(ASSIGN, makeBool(true), makeName(str.c_str()), makeAsmCoercion(makeName(str.c_str()), locals[str].type))); + } + if (varDefs->size()) { + stats[next] = make1(VAR, varDefs); + } + /* + if (inlines->size() > 0) { + var i = 0; + traverse(func, function(node, type) { + if (type == CALL && node[1][0] == NAME && node[1][1] == 'inlinejs') { + node[1] = inlines[i++]; // swap back in the body + } + }); + } + */ + // ensure that there's a final RETURN statement if needed. + if (ret != ASM_NONE) { + Ref retStmt = stats->back(); + if (!retStmt || retStmt[0] != RETURN) { + Ref retVal = makeNum(0); + if (ret != ASM_INT) { + retVal = makeAsmCoercion(retVal, ret); + } + stats->push_back(make1(RETURN, retVal)); + } + } + //printErr('denormalized \n\n' + astToSrc(func) + '\n\n'); +} + +// Constructions TODO: share common constructions, and assert they remain frozen + +Ref makeArray(int size_hint=0) { + return &arena.alloc()->setArray(size_hint); +} + +Ref makeBool(bool b) { + return &arena.alloc()->setBool(b); +} + +Ref makeString(const IString& s) { + return &arena.alloc()->setString(s); +} + +Ref makeEmpty() { + return ValueBuilder::makeToplevel(); +} + +Ref makeNum(double x) { + return ValueBuilder::makeDouble(x); +} + +Ref makeName(IString str) { + return ValueBuilder::makeName(str); +} + +Ref makeBlock() { + return ValueBuilder::makeBlock(); +} + +Ref make1(IString s1, Ref a) { + Ref ret(makeArray(2)); + ret->push_back(makeString(s1)); + ret->push_back(a); + return ret; +} + +Ref make2(IString s1, IString s2, Ref a) { + Ref ret(makeArray(2)); + ret->push_back(makeString(s1)); + ret->push_back(makeString(s2)); + ret->push_back(a); + return ret; +} + +Ref make2(IString s1, Ref a, Ref b) { + Ref ret(makeArray(3)); + ret->push_back(makeString(s1)); + ret->push_back(a); + ret->push_back(b); + return ret; +} + +Ref make3(IString type, IString a, Ref b, Ref c) { + Ref ret(makeArray(4)); + ret->push_back(makeString(type)); + ret->push_back(makeString(a)); + ret->push_back(b); + ret->push_back(c); + return ret; +} + +Ref make3(IString type, Ref a, Ref b, Ref c) { + Ref ret(makeArray(4)); + ret->push_back(makeString(type)); + ret->push_back(a); + ret->push_back(b); + ret->push_back(c); + return ret; +} + +Ref makeAsmVarDef(const IString& v, AsmType type) { + Ref val; + switch (type) { + case ASM_INT: val = makeNum(0); break; + case ASM_DOUBLE: val = make2(UNARY_PREFIX, PLUS, makeNum(0)); break; + case ASM_FLOAT: { + if (!ASM_FLOAT_ZERO.isNull()) { + val = makeName(ASM_FLOAT_ZERO); + } else { + val = make2(CALL, makeName(MATH_FROUND), &(makeArray(1))->push_back(makeNum(0))); + } + break; + } + case ASM_FLOAT32X4: { + val = make2(CALL, makeName(SIMD_FLOAT32X4), &(makeArray(4))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); + break; + } + case ASM_FLOAT64X2: { + val = make2(CALL, makeName(SIMD_FLOAT64X2), &(makeArray(2))->push_back(makeNum(0)).push_back(makeNum(0))); + break; + } + case ASM_INT8X16: { + val = make2(CALL, makeName(SIMD_INT8X16), &(makeArray(16))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); + break; + } + case ASM_INT16X8: { + val = make2(CALL, makeName(SIMD_INT16X8), &(makeArray(8))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); + break; + } + case ASM_INT32X4: { + val = make2(CALL, makeName(SIMD_INT32X4), &(makeArray(4))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); + break; + } + default: assert(0); + } + return make1(v, val); +} + +Ref makeAsmCoercion(Ref node, AsmType type) { + switch (type) { + case ASM_INT: return make3(BINARY, OR, node, makeNum(0)); + case ASM_DOUBLE: return make2(UNARY_PREFIX, PLUS, node); + case ASM_FLOAT: return make2(CALL, makeName(MATH_FROUND), &(makeArray(1))->push_back(node)); + case ASM_FLOAT32X4: return make2(CALL, makeName(SIMD_FLOAT32X4_CHECK), &(makeArray(1))->push_back(node)); + case ASM_FLOAT64X2: return make2(CALL, makeName(SIMD_FLOAT64X2_CHECK), &(makeArray(1))->push_back(node)); + case ASM_INT8X16: return make2(CALL, makeName(SIMD_INT8X16_CHECK), &(makeArray(1))->push_back(node)); + case ASM_INT16X8: return make2(CALL, makeName(SIMD_INT16X8_CHECK), &(makeArray(1))->push_back(node)); + case ASM_INT32X4: return make2(CALL, makeName(SIMD_INT32X4_CHECK), &(makeArray(1))->push_back(node)); + case ASM_NONE: + default: return node; // non-validating code, emit nothing XXX this is dangerous, we should only allow this when we know we are not validating + } +} + +// Checks + +bool isEmpty(Ref node) { + return (node->size() == 2 && node[0] == TOPLEVEL && node[1]->size() == 0) || + (node->size() > 0 && node[0] == BLOCK && (!node[1] || node[1]->size() == 0)); +} + +bool commable(Ref node) { // TODO: hashing + IString type = node[0]->getIString(); + if (type == ASSIGN || type == BINARY || type == UNARY_PREFIX || type == NAME || type == NUM || type == CALL || type == SEQ || type == CONDITIONAL || type == SUB) return true; + return false; +} + +bool isMathFunc(const char *name) { + static const char *Math_ = "Math_"; + static unsigned size = strlen(Math_); + return strncmp(name, Math_, size) == 0; +} + +bool isMathFunc(Ref value) { + return value->isString() && isMathFunc(value->getCString()); +} + +bool callHasSideEffects(Ref node) { // checks if the call itself (not the args) has side effects (or is not statically known) + return !(node[1][0] == NAME && isMathFunc(node[1][1])); +} + +bool hasSideEffects(Ref node) { // this is 99% incomplete! + IString type = node[0]->getIString(); + switch (type[0]) { + case 'n': + if (type == NUM || type == NAME) return false; + break; + case 's': + if (type == STRING) return false; + if (type == SUB) return hasSideEffects(node[1]) || hasSideEffects(node[2]); + break; + case 'u': + if (type == UNARY_PREFIX) return hasSideEffects(node[2]); + break; + case 'b': + if (type == BINARY) return hasSideEffects(node[2]) || hasSideEffects(node[3]); + break; + case 'c': + if (type == CALL) { + if (callHasSideEffects(node)) return true; + // This is a statically known call, with no side effects. only args can side effect us + for (auto arg : node[2]->getArray()) { + if (hasSideEffects(arg)) return true; + } + return false; + } else if (type == CONDITIONAL) return hasSideEffects(node[1]) || hasSideEffects(node[2]) || hasSideEffects(node[3]); + break; + } + return true; +} + +// checks if a node has just basic operations, nothing with side effects nor that can notice side effects, which +// implies we can move it around in the code +bool triviallySafeToMove(Ref node, AsmData& asmData) { + bool ok = true; + traversePre(node, [&](Ref node) { + Ref type = node[0]; + if (type == STAT || type == BINARY || type == UNARY_PREFIX || type == ASSIGN || type == NUM) return; + else if (type == NAME) { + if (!asmData.isLocal(node[1]->getIString())) ok = false; + } else if (type == CALL) { + if (callHasSideEffects(node)) ok = false; + } else { + ok = false; + } + }); + return ok; +} + +// Transforms + +// We often have branchings that are simplified so one end vanishes, and +// we then get +// if (!(x < 5)) +// or such. Simplifying these saves space and time. +Ref simplifyNotCompsDirect(Ref node) { + if (node[0] == UNARY_PREFIX && node[1] == L_NOT) { + // de-morgan's laws do not work on floats, due to nans >:( + if (node[2][0] == BINARY && (detectType(node[2][2]) == ASM_INT && detectType(node[2][3]) == ASM_INT)) { + Ref op = node[2][1]; + switch(op->getCString()[0]) { + case '<': { + if (op == LT) { op->setString(GE); break; } + if (op == LE) { op->setString(GT); break; } + return node; + } + case '>': { + if (op == GT) { op->setString(LE); break; } + if (op == GE) { op->setString(LT); break; } + return node; + } + case '=': { + if (op == EQ) { op->setString(NE); break; } + return node; + } + case '!': { + if (op == NE) { op->setString(EQ); break; } + return node; + } + default: return node; + } + return make3(BINARY, op, node[2][2], node[2][3]); + } else if (node[2][0] == UNARY_PREFIX && node[2][1] == L_NOT) { + return node[2][2]; + } + } + return node; +} + +Ref flipCondition(Ref cond) { + return simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, cond)); +} + +void safeCopy(Ref target, Ref source) { // safely copy source onto target, even if source is a subnode of target + Ref temp = source; // hold on to source + *target = *temp; +} + +void clearEmptyNodes(Ref arr) { + int skip = 0; + for (size_t i = 0; i < arr->size(); i++) { + if (skip) { + arr[i-skip] = arr[i]; + } + if (isEmpty(deStat(arr[i]))) { + skip++; + } + } + if (skip) arr->setSize(arr->size() - skip); +} +void clearUselessNodes(Ref arr) { + int skip = 0; + for (size_t i = 0; i < arr->size(); i++) { + Ref curr = arr[i]; + if (skip) { + arr[i-skip] = curr; + } + if (isEmpty(deStat(curr)) || (curr[0] == STAT && !hasSideEffects(curr[1]))) { + skip++; + } + } + if (skip) arr->setSize(arr->size() - skip); +} + +void removeAllEmptySubNodes(Ref ast) { + traversePre(ast, [](Ref node) { + if (node[0] == DEFUN) { + clearEmptyNodes(node[3]); + } else if (node[0] == BLOCK && node->size() > 1 && !!node[1]) { + clearEmptyNodes(node[1]); + } else if (node[0] == SEQ && isEmpty(node[1])) { + safeCopy(node, node[2]); + } + }); +} +void removeAllUselessSubNodes(Ref ast) { + traversePrePost(ast, [](Ref node) { + Ref type = node[0]; + if (type == DEFUN) { + clearUselessNodes(node[3]); + } else if (type == BLOCK && node->size() > 1 && !!node[1]) { + clearUselessNodes(node[1]); + } else if (type == SEQ && isEmpty(node[1])) { + safeCopy(node, node[2]); + } + }, [](Ref node) { + Ref type = node[0]; + if (type == IF) { + bool empty2 = isEmpty(node[2]), has3 = node->size() == 4 && !!node[3], empty3 = !has3 || isEmpty(node[3]); + if (!empty2 && empty3 && has3) { // empty else clauses + node->setSize(3); + } else if (empty2 && !empty3) { // empty if blocks + safeCopy(node, make2(IF, make2(UNARY_PREFIX, L_NOT, node[1]), node[3])); + } else if (empty2 && empty3) { + if (hasSideEffects(node[1])) { + safeCopy(node, make1(STAT, node[1])); + } else { + safeCopy(node, makeEmpty()); + } + } + } + }); +} + +Ref unVarify(Ref vars) { // transform var x=1, y=2 etc. into (x=1, y=2), i.e., the same assigns, but without a var definition + Ref ret = makeArray(1); + ret->push_back(makeString(STAT)); + if (vars->size() == 1) { + ret->push_back(make3(ASSIGN, makeBool(true), makeName(vars[0][0]->getIString()), vars[0][1])); + } else { + ret->push_back(makeArray(vars->size()-1)); + Ref curr = ret[1]; + for (size_t i = 0; i+1 < vars->size(); i++) { + curr->push_back(makeString(SEQ)); + curr->push_back(make3(ASSIGN, makeBool(true), makeName(vars[i][0]->getIString()), vars[i][1])); + if (i != vars->size()-2) { + curr->push_back(makeArray()); + curr = curr[2]; + } + } + curr->push_back(make3(ASSIGN, makeBool(true), makeName(vars->back()[0]->getIString()), vars->back()[1])); + } + return ret; +} + +// Calculations + +int measureCost(Ref ast) { + int size = 0; + traversePre(ast, [&size](Ref node) { + Ref type = node[0]; + if (type == NUM || type == UNARY_PREFIX) size--; + else if (type == BINARY) { + if (node[3][0] == NUM && node[3][1]->getNumber() == 0) size--; + else if (node[1] == DIV || node[1] == MOD) size += 2; + } + else if (type == CALL && !callHasSideEffects(node)) size -= 2; + else if (type == SUB) size++; + size++; + }); + return size; +} + +//================== +// Params +//================== + +bool preciseF32 = false, + receiveJSON = false, + emitJSON = false, + minifyWhitespace = false, + last = false; + +//===================== +// Optimization passes +//===================== + +#define HASES \ + bool has(const IString& str) { \ + return count(str) > 0; \ + } \ + bool has(Ref node) { \ + return node->isString() && count(node->getIString()) > 0; \ + } + +class StringSet : public cashew::IStringSet { +public: + StringSet() {} + StringSet(const char *str) : IStringSet(str) {} + + HASES + + void dump() { + err("==="); + for (auto str : *this) { + errv("%s", str.c_str()); + } + err("==="); + } +}; + +StringSet USEFUL_BINARY_OPS("<< >> | & ^"), + COMPARE_OPS("< <= > >= == == != !=="), + BITWISE("| & ^"), + SAFE_BINARY_OPS("+ -"), // division is unsafe as it creates non-ints in JS; mod is unsafe as signs matter so we can't remove |0's; mul does not nest with +,- in asm + COERCION_REQUIRING_OPS("sub unary-prefix"), // ops that in asm must be coerced right away + COERCION_REQUIRING_BINARIES("* / %"); // binary ops that in asm must be coerced + +StringSet ASSOCIATIVE_BINARIES("+ * | & ^"), + CONTROL_FLOW("do while for if switch"), + LOOP("do while for"), + NAME_OR_NUM("name num"), + CONDITION_CHECKERS("if do while switch"), + SAFE_TO_DROP_COERCION("unary-prefix name num"); + +StringSet BREAK_CAPTURERS("do while for switch"), + CONTINUE_CAPTURERS("do while for"), + FUNCTIONS_THAT_ALWAYS_THROW("abort ___resumeException ___cxa_throw ___cxa_rethrow"); + +bool isFunctionTable(const char *name) { + static const char *functionTable = "FUNCTION_TABLE"; + static unsigned size = strlen(functionTable); + return strncmp(name, functionTable, size) == 0; +} + +bool isFunctionTable(Ref value) { + return value->isString() && isFunctionTable(value->getCString()); +} + +// Internal utilities + +bool canDropCoercion(Ref node) { + if (SAFE_TO_DROP_COERCION.has(node[0])) return true; + if (node[0] == BINARY) { + switch (node[1]->getCString()[0]) { + case '>': return node[1] == RSHIFT || node[1] == TRSHIFT; + case '<': return node[1] == LSHIFT; + case '|': case '^': case '&': return true; + } + } + return false; +} + +Ref simplifyCondition(Ref node) { + node = simplifyNotCompsDirect(node); + // on integers, if (x == 0) is the same as if (x), and if (x != 0) as if (!x) + if (node[0] == BINARY && (node[1] == EQ || node[1] == NE)) { + Ref target; + if (detectType(node[2]) == ASM_INT && node[3][0] == NUM && node[3][1]->getNumber() == 0) { + target = node[2]; + } else if (detectType(node[3]) == ASM_INT && node[2][0] == NUM && node[2][1]->getNumber() == 0) { + target = node[3]; + } + if (!!target) { + if (target[0] == BINARY && (target[1] == OR || target[1] == TRSHIFT) && target[3][0] == NUM && target[3][1]->getNumber() == 0 && + canDropCoercion(target[2])) { + target = target[2]; // drop the coercion, in a condition it is ok to do if (x) + } + if (node[1] == EQ) { + return make2(UNARY_PREFIX, L_NOT, target); + } else { + return target; + } + } + } + return node; +} + +// Passes + +// Eliminator aka Expressionizer +// +// The goal of this pass is to eliminate unneeded variables (which represent one of the infinite registers in the LLVM +// model) and thus to generate complex expressions where possible, for example +// +// var x = a(10); +// var y = HEAP[20]; +// print(x+y); +// +// can be transformed into +// +// print(a(10)+HEAP[20]); +// +// The basic principle is to scan along the code in the order of parsing/execution, and keep a list of tracked +// variables that are current contenders for elimination. We must untrack when we see something that we cannot +// cross, for example, a write to memory means we must invalidate variables that depend on reading from +// memory, since if we change the order then we do not preserve the computation. +// +// We rely on some assumptions about emscripten-generated code here, which means we can do a lot more than +// a general JS optimization can. For example, we assume that SUB nodes (indexing like HEAP[..]) are +// memory accesses or FUNCTION_TABLE accesses, and in both cases that the symbol cannot be replaced although +// the contents can. So we assume FUNCTION_TABLE might have its contents changed but not be pointed to +// a different object, which allows +// +// var x = f(); +// FUNCTION_TABLE[x](); +// +// to be optimized (f could replace FUNCTION_TABLE, so in general JS eliminating x is not valid). +// +// In memSafe mode, we are more careful and assume functions can replace HEAP and FUNCTION_TABLE, which +// can happen in ALLOW_MEMORY_GROWTH mode + +StringSet ELIMINATION_SAFE_NODES("assign call if toplevel do return label switch binary unary-prefix"); // do is checked carefully, however +StringSet IGNORABLE_ELIMINATOR_SCAN_NODES("num toplevel string break continue dot"); // dot can only be STRING_TABLE.* +StringSet ABORTING_ELIMINATOR_SCAN_NODES("new object function defun for while array throw"); // we could handle some of these, TODO, but nontrivial (e.g. for while, the condition is hit multiple times after the body) +StringSet HEAP_NAMES("HEAP8 HEAP16 HEAP32 HEAPU8 HEAPU16 HEAPU32 HEAPF32 HEAPF64"); + +bool isTempDoublePtrAccess(Ref node) { // these are used in bitcasts; they are not really affecting memory, and should cause no invalidation + assert(node[0] == SUB); + return (node[2][0] == NAME && node[2][1] == TEMP_DOUBLE_PTR) || + (node[2][0] == BINARY && ((node[2][2][0] == NAME && node[2][2][1] == TEMP_DOUBLE_PTR) || + (node[2][3][0] == NAME && node[2][3][1] == TEMP_DOUBLE_PTR))); +} + +class StringIntMap : public std::unordered_map { +public: + HASES +}; + +class StringStringMap : public std::unordered_map { +public: + HASES +}; + +class StringRefMap : public std::unordered_map { +public: + HASES +}; + +class StringTypeMap : public std::unordered_map { +public: + HASES +}; + +void eliminate(Ref ast, bool memSafe) { +#ifdef PROFILING + clock_t tasmdata = 0; + clock_t tfnexamine = 0; + clock_t tvarcheck = 0; + clock_t tstmtelim = 0; + clock_t tstmtscan = 0; + clock_t tcleanvars = 0; + clock_t treconstruct = 0; +#endif + + // Find variables that have a single use, and if they can be eliminated, do so + traverseFunctions(ast, [&](Ref func) { + +#ifdef PROFILING + clock_t start = clock(); +#endif + + AsmData asmData(func); + +#ifdef PROFILING + tasmdata += clock() - start; + start = clock(); +#endif + + // First, find the potentially eliminatable functions: that have one definition and one use + + StringIntMap definitions; + StringIntMap uses; + StringIntMap namings; + StringRefMap values; + StringIntMap varsToRemove; // variables being removed, that we can eliminate all 'var x;' of (this refers to VAR nodes we should remove) + // 1 means we should remove it, 2 means we successfully removed it + StringSet varsToTryToRemove; // variables that have 0 uses, but have side effects - when we scan we can try to remove them + + // examine body and note locals + traversePre(func, [&](Ref node) { + Ref type = node[0]; + if (type == NAME) { + IString& name = node[1]->getIString(); + uses[name]++; + namings[name]++; + } else if (type == ASSIGN) { + Ref target = node[2]; + if (target[0] == NAME) { + IString& name = target[1]->getIString(); + // values is only used if definitions is 1 + if (definitions[name]++ == 0) { + values[name] = node[3]; + } + assert(node[1]->isBool(true)); // not +=, -= etc., just = + uses[name]--; // because the name node will show up by itself in the previous case + } + } + }); + +#ifdef PROFILING + tfnexamine += clock() - start; + start = clock(); +#endif + + StringSet potentials; // local variables with 1 definition and 1 use + StringSet sideEffectFree; // whether a local variable has no side effects in its definition. Only relevant when there are no uses + + auto unprocessVariable = [&](IString name) { + potentials.erase(name); + varsToRemove.erase(name); + sideEffectFree.erase(name); + varsToTryToRemove.erase(name); + }; + std::function processVariable = [&](IString name) { + if (definitions[name] == 1 && uses[name] == 1) { + potentials.insert(name); + } else if (uses[name] == 0 && definitions[name] <= 1) { // no uses, no def or 1 def (cannot operate on phis, and the llvm optimizer will remove unneeded phis anyhow) (no definition means it is a function parameter, or a local with just |var x;| but no defining assignment) + bool sideEffects = false; + auto val = values.find(name); + Ref value; + if (val != values.end()) { + value = val->second; + // TODO: merge with other side effect code + // First, pattern-match + // (HEAP32[((tempDoublePtr)>>2)]=((HEAP32[(($_sroa_0_0__idx1)>>2)])|0),HEAP32[(((tempDoublePtr)+(4))>>2)]=((HEAP32[((($_sroa_0_0__idx1)+(4))>>2)])|0),(+(HEAPF64[(tempDoublePtr)>>3]))) + // which has no side effects and is the special form of converting double to i64. + if (!(value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][2][0] == BINARY && value[1][2][2][1] == RSHIFT && + value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR)) { + // If not that, then traverse and scan normally. + sideEffects = hasSideEffects(value); + } + } + if (!sideEffects) { + varsToRemove[name] = !definitions[name] ? 2 : 1; // remove it normally + sideEffectFree.insert(name); + // Each time we remove a variable with 0 uses, if its value has no + // side effects and vanishes too, then we can remove a use from variables + // appearing in it, and possibly eliminate again + if (!!value) { + traversePre(value, [&](Ref node) { + if (node[0] == NAME) { + IString name = node[1]->getIString(); + node[1]->setString(EMPTY); // we can remove this - it will never be shown, and should not be left to confuse us as we traverse + if (asmData.isLocal(name)) { + uses[name]--; // cannot be infinite recursion since we descend an energy function + assert(uses[name] >= 0); + unprocessVariable(name); + processVariable(name); + } + } else if (node[0] == CALL) { + // no side effects, so this must be a Math.* call or such. We can just ignore it and all children + node[0]->setString(NAME); + node[1]->setString(EMPTY); + } + }); + } + } else { + varsToTryToRemove.insert(name); // try to remove it later during scanning + } + } + }; + for (auto name : asmData.locals) { + processVariable(name.first); + } + +#ifdef PROFILING + tvarcheck += clock() - start; + start = clock(); +#endif + + //printErr('defs: ' + JSON.stringify(definitions)); + //printErr('uses: ' + JSON.stringify(uses)); + //printErr('values: ' + JSON.stringify(values)); + //printErr('locals: ' + JSON.stringify(locals)); + //printErr('varsToRemove: ' + JSON.stringify(varsToRemove)); + //printErr('varsToTryToRemove: ' + JSON.stringify(varsToTryToRemove)); + values.clear(); + //printErr('potentials: ' + JSON.stringify(potentials)); + // We can now proceed through the function. In each list of statements, we try to eliminate + struct Tracking { + bool usesGlobals, usesMemory, hasDeps; + Ref defNode; + bool doesCall; + }; + class Tracked : public std::unordered_map { + public: + HASES + }; + Tracked tracked; + #define dumpTracked() { errv("tracking %d", tracked.size()); for (auto t : tracked) errv("... %s", t.first.c_str()); } + // Although a set would be more appropriate, it would also be slower + std::unordered_map depMap; + + bool globalsInvalidated = false; // do not repeat invalidations, until we track something new + bool memoryInvalidated = false; + bool callsInvalidated = false; + auto track = [&](IString name, Ref value, Ref defNode) { // add a potential that has just been defined to the tracked list, we hope to eliminate it + Tracking& track = tracked[name]; + track.usesGlobals = false; + track.usesMemory = false; + track.hasDeps = false; + track.defNode = defNode; + track.doesCall = false; + bool ignoreName = false; // one-time ignorings of names, as first op in sub and call + traversePre(value, [&](Ref node) { + Ref type = node[0]; + if (type == NAME) { + if (!ignoreName) { + IString depName = node[1]->getIString(); + if (!asmData.isLocal(depName)) { + track.usesGlobals = true; + } + if (!potentials.has(depName)) { // deps do not matter for potentials - they are defined once, so no complexity + depMap[depName].push_back(name); + track.hasDeps = true; + } + } else { + ignoreName = false; + } + } else if (type == SUB) { + track.usesMemory = true; + ignoreName = true; + } else if (type == CALL) { + track.usesGlobals = true; + track.usesMemory = true; + track.doesCall = true; + ignoreName = true; + } else { + ignoreName = false; + } + }); + if (track.usesGlobals) globalsInvalidated = false; + if (track.usesMemory) memoryInvalidated = false; + if (track.doesCall) callsInvalidated = false; + }; + + // TODO: invalidate using a sequence number for each type (if you were tracked before the last invalidation, you are cancelled). remove for.in loops + #define INVALIDATE(what, check) \ + auto invalidate##what = [&]() { \ + std::vector temp; \ + for (auto t : tracked) { \ + IString name = t.first; \ + Tracking& info = tracked[name]; \ + if (check) { \ + temp.push_back(name); \ + } \ + } \ + for (size_t i = 0; i < temp.size(); i++) { \ + tracked.erase(temp[i]); \ + } \ + }; + INVALIDATE(Globals, info.usesGlobals); + INVALIDATE(Memory, info.usesMemory); + INVALIDATE(Calls, info.doesCall); + + auto invalidateByDep = [&](IString dep) { + for (auto name : depMap[dep]) { + tracked.erase(name); + } + depMap.erase(dep); + }; + + std::function doEliminate; + + // Generate the sequence of execution. This determines what is executed before what, so we know what can be reordered. Using + // that, performs invalidations and eliminations + auto scan = [&](Ref node) { + bool abort = false; + bool allowTracking = true; // false inside an if; also prevents recursing in an if + std::function traverseInOrder = [&](Ref node, bool ignoreSub) { + if (abort) return; + Ref type = node[0]; + if (type == ASSIGN) { + Ref target = node[2]; + Ref value = node[3]; + bool nameTarget = target[0] == NAME; + // If this is an assign to a name, handle it below rather than + // traversing and treating as a read + if (!nameTarget) { + traverseInOrder(target, true); // evaluate left + } + traverseInOrder(value, false); // evaluate right + // do the actual assignment + if (nameTarget) { + IString name = target[1]->getIString(); + if (potentials.has(name) && allowTracking) { + track(name, node[3], node); + } else if (varsToTryToRemove.has(name)) { + // replace it in-place + safeCopy(node, value); + varsToRemove[name] = 2; + } else { + // expensive check for invalidating specific tracked vars. This list is generally quite short though, because of + // how we just eliminate in short spans and abort when control flow happens TODO: history numbers instead + invalidateByDep(name); // can happen more than once per dep.. + if (!asmData.isLocal(name) && !globalsInvalidated) { + invalidateGlobals(); + globalsInvalidated = true; + } + // if we can track this name (that we assign into), and it has 0 uses and we want to remove its VAR + // definition - then remove it right now, there is no later chance + if (allowTracking && varsToRemove.has(name) && uses[name] == 0) { + track(name, node[3], node); + doEliminate(name, node); + } + } + } else if (target[0] == SUB) { + if (isTempDoublePtrAccess(target)) { + if (!globalsInvalidated) { + invalidateGlobals(); + globalsInvalidated = true; + } + } else if (!memoryInvalidated) { + invalidateMemory(); + memoryInvalidated = true; + } + } + } else if (type == SUB) { + // Only keep track of the global array names in memsafe mode i.e. + // when they may change underneath us due to resizing + if (node[1][0] != NAME || memSafe) { + traverseInOrder(node[1], false); // evaluate inner + } + traverseInOrder(node[2], false); // evaluate outer + // ignoreSub means we are a write (happening later), not a read + if (!ignoreSub && !isTempDoublePtrAccess(node)) { + // do the memory access + if (!callsInvalidated) { + invalidateCalls(); + callsInvalidated = true; + } + } + } else if (type == BINARY) { + bool flipped = false; + if (ASSOCIATIVE_BINARIES.has(node[1]) && !NAME_OR_NUM.has(node[2][0]) && NAME_OR_NUM.has(node[3][0])) { // TODO recurse here? + // associatives like + and * can be reordered in the simple case of one of the sides being a name, since we assume they are all just numbers + Ref temp = node[2]; + node[2] = node[3]; + node[3] = temp; + flipped = true; + } + traverseInOrder(node[2], false); + traverseInOrder(node[3], false); + if (flipped && NAME_OR_NUM.has(node[2][0])) { // dunno if we optimized, but safe to flip back - and keeps the code closer to the original and more readable + Ref temp = node[2]; + node[2] = node[3]; + node[3] = temp; + } + } else if (type == NAME) { + IString name = node[1]->getIString(); + if (tracked.has(name)) { + doEliminate(name, node); + } else if (!asmData.isLocal(name) && !callsInvalidated && (memSafe || !HEAP_NAMES.has(name))) { // ignore HEAP8 etc when not memory safe, these are ok to + // access, e.g. SIMD_Int32x4_load(HEAP8, ...) + invalidateCalls(); + callsInvalidated = true; + } + } else if (type == UNARY_PREFIX || type == UNARY_POSTFIX) { + traverseInOrder(node[2], false); + } else if (IGNORABLE_ELIMINATOR_SCAN_NODES.has(type)) { + } else if (type == CALL) { + // Named functions never change and are therefore safe to not track + if (node[1][0] != NAME) { + traverseInOrder(node[1], false); + } + Ref args = node[2]; + for (size_t i = 0; i < args->size(); i++) { + traverseInOrder(args[i], false); + } + if (callHasSideEffects(node)) { + // these two invalidations will also invalidate calls + if (!globalsInvalidated) { + invalidateGlobals(); + globalsInvalidated = true; + } + if (!memoryInvalidated) { + invalidateMemory(); + memoryInvalidated = true; + } + } + } else if (type == IF) { + if (allowTracking) { + traverseInOrder(node[1], false); // can eliminate into condition, but nowhere else + if (!callsInvalidated) { // invalidate calls, since we cannot eliminate them into an if that may not execute! + invalidateCalls(); + callsInvalidated = true; + } + allowTracking = false; + traverseInOrder(node[2], false); // 2 and 3 could be 'parallel', really.. + if (!!node[3]) traverseInOrder(node[3], false); + allowTracking = true; + } else { + tracked.clear(); + } + } else if (type == BLOCK) { + Ref stats = getStatements(node); + if (!!stats) { + for (size_t i = 0; i < stats->size(); i++) { + traverseInOrder(stats[i], false); + } + } + } else if (type == STAT) { + traverseInOrder(node[1], false); + } else if (type == LABEL) { + traverseInOrder(node[2], false); + } else if (type == SEQ) { + traverseInOrder(node[1], false); + traverseInOrder(node[2], false); + } else if (type == DO) { + if (node[1][0] == NUM && node[1][1]->getNumber() == 0) { // one-time loop + traverseInOrder(node[2], false); + } else { + tracked.clear(); + } + } else if (type == RETURN) { + if (!!node[1]) traverseInOrder(node[1], false); + } else if (type == CONDITIONAL) { + if (!callsInvalidated) { // invalidate calls, since we cannot eliminate them into a branch of an LLVM select/JS conditional that does not execute + invalidateCalls(); + callsInvalidated = true; + } + traverseInOrder(node[1], false); + traverseInOrder(node[2], false); + traverseInOrder(node[3], false); + } else if (type == SWITCH) { + traverseInOrder(node[1], false); + Tracked originalTracked = tracked; + Ref cases = node[2]; + for (size_t i = 0; i < cases->size(); i++) { + Ref c = cases[i]; + assert(c[0]->isNull() || c[0][0] == NUM || (c[0][0] == UNARY_PREFIX && c[0][2][0] == NUM)); + Ref stats = c[1]; + for (size_t j = 0; j < stats->size(); j++) { + traverseInOrder(stats[j], false); + } + // We cannot track from one switch case into another if there are external dependencies, undo all new trackings + // Otherwise we can track, e.g. a var used in a case before assignment in another case is UB in asm.js, so no need for the assignment + // TODO: general framework here, use in if-else as well + std::vector toDelete; + for (auto t : tracked) { + if (!originalTracked.has(t.first)) { + Tracking& info = tracked[t.first]; + if (info.usesGlobals || info.usesMemory || info.hasDeps) { + toDelete.push_back(t.first); + } + } + } + for (auto t : toDelete) { + tracked.erase(t); + } + } + tracked.clear(); // do not track from inside the switch to outside + } else { + assert(ABORTING_ELIMINATOR_SCAN_NODES.has(type)); + tracked.clear(); + abort = true; + } + }; + traverseInOrder(node, false); + }; + //var eliminationLimit = 0; // used to debugging purposes + doEliminate = [&](IString name, Ref node) { + //if (eliminationLimit == 0) return; + //eliminationLimit--; + //printErr('elim!!!!! ' + name); + // yes, eliminate! + varsToRemove[name] = 2; // both assign and var definitions can have other vars we must clean up + assert(tracked.has(name)); + Tracking& info = tracked[name]; + Ref defNode = info.defNode; + assert(!!defNode); + if (!sideEffectFree.has(name)) { + assert(defNode[0] != VAR); + // assign + Ref value = defNode[3]; + // wipe out the assign + safeCopy(defNode, makeEmpty()); + // replace this node in-place + safeCopy(node, value); + } else { + // This has no side effects and no uses, empty it out in-place + safeCopy(node, makeEmpty()); + } + tracked.erase(name); + }; + traversePre(func, [&](Ref block) { + // Look for statements, including while-switch pattern + Ref stats = getStatements(block); + if (!stats && (block[0] == WHILE && block[2][0] == SWITCH)) { + stats = &(makeArray(1)->push_back(block[2])); + } + if (!stats) return; + tracked.clear(); + for (size_t i = 0; i < stats->size(); i++) { + Ref node = deStat(stats[i]); + Ref type = node[0]; + if (type == RETURN && i+1 < stats->size()) { + stats->setSize(i+1); // remove any code after a return + } + // Check for things that affect elimination + if (ELIMINATION_SAFE_NODES.has(type)) { +#ifdef PROFILING + tstmtelim += clock() - start; + start = clock(); +#endif + scan(node); +#ifdef PROFILING + tstmtscan += clock() - start; + start = clock(); +#endif + } else if (type == VAR) { + continue; // asm normalisation has reduced 'var' to just the names + } else { + tracked.clear(); // not a var or assign, break all potential elimination so far + } + } + }); + +#ifdef PROFILING + tstmtelim += clock() - start; + start = clock(); +#endif + + StringIntMap seenUses; + StringStringMap helperReplacements; // for looper-helper optimization + + // clean up vars, and loop variable elimination + traversePrePost(func, [&](Ref node) { + // pre + Ref type = node[0]; + /*if (type == VAR) { + node[1] = node[1].filter(function(pair) { return !varsToRemove[pair[0]] }); + if (node[1]->size() == 0) { + // wipe out an empty |var;| + node[0] = TOPLEVEL; + node[1] = []; + } + } else */ + if (type == ASSIGN && node[1]->isBool(true) && node[2][0] == NAME && node[3][0] == NAME && node[2][1] == node[3][1]) { + // elimination led to X = X, which we can just remove + safeCopy(node, makeEmpty()); + } + }, [&](Ref node) { + // post + Ref type = node[0]; + if (type == NAME) { + IString name = node[1]->getIString(); + if (helperReplacements.has(name)) { + node[1]->setString(helperReplacements[name]); + return; // no need to track this anymore, we can't loop-optimize more than once + } + // track how many uses we saw. we need to know when a variable is no longer used (hence we run this in the post) + seenUses[name]++; + } else if (type == WHILE) { + // try to remove loop helper variables specifically + Ref stats = node[2][1]; + Ref last = stats->back(); + if (!!last && last[0] == IF && last[2][0] == BLOCK && !!last[3] && last[3][0] == BLOCK) { + Ref ifTrue = last[2]; + Ref ifFalse = last[3]; + clearEmptyNodes(ifTrue[1]); + clearEmptyNodes(ifFalse[1]); + bool flip = false; + if (ifFalse[1]->size() > 0 && !!ifFalse[1][0] && !!ifFalse[1]->back() && ifFalse[1]->back()[0] == BREAK) { // canonicalize break in the if-true + Ref temp = ifFalse; + ifFalse = ifTrue; + ifTrue = temp; + flip = true; + } + if (ifTrue[1]->size() > 0 && !!ifTrue[1][0] && !!ifTrue[1]->back() && ifTrue[1]->back()[0] == BREAK) { + Ref assigns = ifFalse[1]; + clearEmptyNodes(assigns); + std::vector loopers, helpers; + for (size_t i = 0; i < assigns->size(); i++) { + if (assigns[i][0] == STAT && assigns[i][1][0] == ASSIGN) { + Ref assign = assigns[i][1]; + if (assign[1]->isBool(true) && assign[2][0] == NAME && assign[3][0] == NAME) { + IString looper = assign[2][1]->getIString(); + IString helper = assign[3][1]->getIString(); + if (definitions[helper] == 1 && seenUses[looper] == namings[looper] && + !helperReplacements.has(helper) && !helperReplacements.has(looper)) { + loopers.push_back(looper); + helpers.push_back(helper); + } + } + } + } + // remove loop vars that are used in the rest of the else + for (size_t i = 0; i < assigns->size(); i++) { + if (assigns[i][0] == STAT && assigns[i][1][0] == ASSIGN) { + Ref assign = assigns[i][1]; + if (!(assign[1]->isBool(true) && assign[2][0] == NAME && assign[3][0] == NAME) || indexOf(loopers, assign[2][1]->getIString()) < 0) { + // this is not one of the loop assigns + traversePre(assign, [&](Ref node) { + if (node[0] == NAME) { + int index = indexOf(loopers, node[1]->getIString()); + if (index < 0) index = indexOf(helpers, node[1]->getIString()); + if (index >= 0) { + loopers.erase(loopers.begin() + index); + helpers.erase(helpers.begin() + index); + } + } + }); + } + } + } + // remove loop vars that are used in the if + traversePre(ifTrue, [&](Ref node) { + if (node[0] == NAME) { + int index = indexOf(loopers, node[1]->getIString()); + if (index < 0) index = indexOf(helpers, node[1]->getIString()); + if (index >= 0) { + loopers.erase(loopers.begin() + index); + helpers.erase(helpers.begin() + index); + } + } + }); + if (loopers.size() == 0) return; + for (size_t l = 0; l < loopers.size(); l++) { + IString looper = loopers[l]; + IString helper = helpers[l]; + // the remaining issue is whether loopers are used after the assignment to helper and before the last line (where we assign to it) + int found = -1; + for (int i = (int)stats->size()-2; i >= 0; i--) { + Ref curr = stats[i]; + if (curr[0] == STAT && curr[1][0] == ASSIGN) { + Ref currAssign = curr[1]; + if (currAssign[1]->isBool(true) && currAssign[2][0] == NAME) { + IString to = currAssign[2][1]->getIString(); + if (to == helper) { + found = i; + break; + } + } + } + } + if (found < 0) return; + // if a loop variable is used after we assigned to the helper, we must save its value and use that. + // (note that this can happen due to elimination, if we eliminate an expression containing the + // loop var far down, past the assignment!) + // first, see if the looper and helpers overlap. Note that we check for this looper, compared to + // *ALL* the helpers. Helpers will be replaced by loopers as we eliminate them, potentially + // causing conflicts, so any helper is a concern. + int firstLooperUsage = -1; + int lastLooperUsage = -1; + int firstHelperUsage = -1; + for (int i = found+1; i < (int)stats->size(); i++) { + Ref curr = i < (int)stats->size()-1 ? stats[i] : last[1]; // on the last line, just look in the condition + traversePre(curr, [&](Ref node) { + if (node[0] == NAME) { + if (node[1] == looper) { + if (firstLooperUsage < 0) firstLooperUsage = i; + lastLooperUsage = i; + } else if (indexOf(helpers, node[1]->getIString()) >= 0) { + if (firstHelperUsage < 0) firstHelperUsage = i; + } + } + }); + } + if (firstLooperUsage >= 0) { + // the looper is used, we cannot simply merge the two variables + if ((firstHelperUsage < 0 || firstHelperUsage > lastLooperUsage) && lastLooperUsage+1 < (int)stats->size() && triviallySafeToMove(stats[found], asmData) && + seenUses[helper] == namings[helper]) { + // the helper is not used, or it is used after the last use of the looper, so they do not overlap, + // and the last looper usage is not on the last line (where we could not append after it), and the + // helper is not used outside of the loop. + // just move the looper definition to after the looper's last use + stats->insert(lastLooperUsage+1, stats[found]); + stats->splice(found, 1); + } else { + // they overlap, we can still proceed with the loop optimization, but we must introduce a + // loop temp helper variable + IString temp(strdupe((std::string(looper.c_str()) + "$looptemp").c_str())); + assert(!asmData.isLocal(temp)); + for (int i = firstLooperUsage; i <= lastLooperUsage; i++) { + Ref curr = i < (int)stats->size()-1 ? stats[i] : last[1]; // on the last line, just look in the condition + + std::function looperToLooptemp = [&](Ref node) { + if (node[0] == NAME) { + if (node[1] == looper) { + node[1]->setString(temp); + } + } else if (node[0] == ASSIGN && node[2][0] == NAME) { + // do not traverse the assignment target, phi assignments to the loop variable must remain + traversePrePostConditional(node[3], looperToLooptemp, [](Ref node){}); + return false; + } + return true; + }; + traversePrePostConditional(curr, looperToLooptemp, [](Ref node){}); + } + asmData.addVar(temp, asmData.getType(looper)); + stats->insert(found, make1(STAT, make3(ASSIGN, makeBool(true), makeName(temp), makeName(looper)))); + } + } + } + for (size_t l = 0; l < helpers.size(); l++) { + for (size_t k = 0; k < helpers.size(); k++) { + if (l != k && helpers[l] == helpers[k]) return; // it is complicated to handle a shared helper, abort + } + } + // hurrah! this is safe to do + for (size_t l = 0; l < loopers.size(); l++) { + IString looper = loopers[l]; + IString helper = helpers[l]; + varsToRemove[helper] = 2; + traversePre(node, [&](Ref node) { // replace all appearances of helper with looper + if (node[0] == NAME && node[1] == helper) node[1]->setString(looper); + }); + helperReplacements[helper] = looper; // replace all future appearances of helper with looper + helperReplacements[looper] = looper; // avoid any further attempts to optimize looper in this manner (seenUses is wrong anyhow, too) + } + // simplify the if. we remove the if branch, leaving only the else + if (flip) { + last[1] = simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, last[1])); + Ref temp = last[2]; + last[2] = last[3]; + last[3] = temp; + } + if (loopers.size() == assigns->size()) { + last->pop_back(); + } else { + Ref elseStats = getStatements(last[3]); + for (size_t i = 0; i < elseStats->size(); i++) { + Ref stat = deStat(elseStats[i]); + if (stat[0] == ASSIGN && stat[2][0] == NAME) { + if (indexOf(loopers, stat[2][1]->getIString()) >= 0) { + elseStats[i] = makeEmpty(); + } + } + } + } + } + } + } + }); + +#ifdef PROFILING + tcleanvars += clock() - start; + start = clock(); +#endif + + for (auto v : varsToRemove) { + if (v.second == 2 && asmData.isVar(v.first)) asmData.deleteVar(v.first); + } + + asmData.denormalize(); + +#ifdef PROFILING + treconstruct += clock() - start; + start = clock(); +#endif + + }); + + removeAllEmptySubNodes(ast); + +#ifdef PROFILING + errv(" EL stages: a:%li fe:%li vc:%li se:%li (ss:%li) cv:%li r:%li", + tasmdata, tfnexamine, tvarcheck, tstmtelim, tstmtscan, tcleanvars, treconstruct); +#endif +} + +void eliminateMemSafe(Ref ast) { + eliminate(ast, true); +} + +void simplifyExpressions(Ref ast) { + // Simplify common expressions used to perform integer conversion operations + // in cases where no conversion is needed. + auto simplifyIntegerConversions = [](Ref ast) { + traversePre(ast, [](Ref node) { + Ref type = node[0]; + if (type == BINARY && node[1] == RSHIFT && node[3][0] == NUM && + node[2][0] == BINARY && node[2][1] == LSHIFT && node[2][3][0] == NUM && node[3][1]->getNumber() == node[2][3][1]->getNumber()) { + // Transform (x&A)<>B to X&A. + Ref innerNode = node[2][2]; + double shifts = node[3][1]->getNumber(); + if (innerNode[0] == BINARY && innerNode[1] == AND && innerNode[3][0] == NUM) { + double mask = innerNode[3][1]->getNumber(); + if (isInteger32(mask) && isInteger32(shifts) && ((jsD2I(mask) << jsD2I(shifts)) >> jsD2I(shifts)) == jsD2I(mask)) { + safeCopy(node, innerNode); + return; + } + } + } else if (type == BINARY && BITWISE.has(node[1])) { + for (int i = 2; i <= 3; i++) { + Ref subNode = node[i]; + if (subNode[0] == BINARY && subNode[1] == AND && subNode[3][0] == NUM && subNode[3][1]->getNumber() == 1) { + // Rewrite (X < Y) & 1 to X < Y , when it is going into a bitwise operator. We could + // remove even more (just replace &1 with |0, then subsequent passes could remove the |0) + // but v8 issue #2513 means the code would then run very slowly in chrome. + Ref input = subNode[2]; + if (input[0] == BINARY && COMPARE_OPS.has(input[1])) { + safeCopy(node[i], input); + } + } + } + } + }); + }; + + // When there is a bunch of math like (((8+5)|0)+12)|0, only the external |0 is needed, one correction is enough. + // At each node, ((X|0)+Y)|0 can be transformed into (X+Y): The inner corrections are not needed + // TODO: Is the same is true for 0xff, 0xffff? + // Likewise, if we have |0 inside a block that will be >>'d, then the |0 is unnecessary because some + // 'useful' mathops already |0 anyhow. + + auto simplifyOps = [](Ref ast) { + auto removeMultipleOrZero = [&ast] { + bool rerun = true; + while (rerun) { + rerun = false; + std::vector stack; + std::function process = [&stack, &rerun, &process, &ast](Ref node) { + Ref type = node[0]; + if (type == BINARY && node[1] == OR) { + if (node[2][0] == NUM && node[3][0] == NUM) { + node[2][1]->setNumber(jsD2I(node[2][1]->getNumber()) | jsD2I(node[3][1]->getNumber())); + stack.push_back(0); + safeCopy(node, node[2]); + return; + } + bool go = false; + if (node[2][0] == NUM && node[2][1]->getNumber() == 0) { + // canonicalize order + Ref temp = node[3]; + node[3] = node[2]; + node[2] = temp; + go = true; + } else if (node[3][0] == NUM && node[3][1]->getNumber() == 0) { + go = true; + } + if (!go) { + stack.push_back(1); + return; + } + // We might be able to remove this correction + for (int i = stack.size()-1; i >= 0; i--) { + if (stack[i] >= 1) { + if (stack.back() < 2 && node[2][0] == CALL) break; // we can only remove multiple |0s on these + if (stack.back() < 1 && (COERCION_REQUIRING_OPS.has(node[2][0]) || + (node[2][0] == BINARY && COERCION_REQUIRING_BINARIES.has(node[2][1])))) break; // we can remove |0 or >>2 + // we will replace ourselves with the non-zero side. Recursively process that node. + Ref result = node[2][0] == NUM && node[2][1]->getNumber() == 0 ? node[3] : node[2], other; + // replace node in-place + safeCopy(node, result); + rerun = true; + process(result); + return; + } else if (stack[i] == -1) { + break; // Too bad, we can't + } + } + stack.push_back(2); // From here on up, no need for this kind of correction, it's done at the top + // (Add this at the end, so it is only added if we did not remove it) + } else if (type == BINARY && USEFUL_BINARY_OPS.has(node[1])) { + stack.push_back(1); + } else if ((type == BINARY && SAFE_BINARY_OPS.has(node[1])) || type == NUM || type == NAME) { + stack.push_back(0); // This node is safe in that it does not interfere with this optimization + } else if (type == UNARY_PREFIX && node[1] == B_NOT) { + stack.push_back(1); + } else { + stack.push_back(-1); // This node is dangerous! Give up if you see this before you see '1' + } + }; + + traversePrePost(ast, process, [&stack](Ref node) { + assert(!stack.empty()); + stack.pop_back(); + }); + } + }; + + removeMultipleOrZero(); + + // & and heap-related optimizations + + bool hasTempDoublePtr = false, rerunOrZeroPass = false; + + traversePrePostConditional(ast, [](Ref node) { + // Detect trees which should not + // be simplified. + if (node[0] == SUB && node[1][0] == NAME && isFunctionTable(node[1][1])) { + return false; // do not traverse subchildren here, we should not collapse 55 & 126. + } + return true; + }, [&hasTempDoublePtr, &rerunOrZeroPass](Ref node) { + // Simplifications are done now so + // that we simplify a node's operands before the node itself. This allows + // optimizations to cascade. + Ref type = node[0]; + if (type == NAME) { + if (node[1] == TEMP_DOUBLE_PTR) hasTempDoublePtr = true; + } else if (type == BINARY && node[1] == AND && node[3][0] == NUM) { + if (node[2][0] == NUM) { + safeCopy(node, makeNum(jsD2I(node[2][1]->getNumber()) & jsD2I(node[3][1]->getNumber()))); + return; + } + Ref input = node[2]; + double amount = node[3][1]->getNumber(); + if (input[0] == BINARY && input[1] == AND && input[3][0] == NUM) { + // Collapse X & 255 & 1 + node[3][1]->setNumber(jsD2I(amount) & jsD2I(input[3][1]->getNumber())); + node[2] = input[2]; + } else if (input[0] == SUB && input[1][0] == NAME) { + // HEAP8[..] & 255 => HEAPU8[..] + HeapInfo hi = parseHeap(input[1][1]->getCString()); + if (hi.valid) { + if (isInteger32(amount) && amount == powl(2, hi.bits)-1) { + if (!hi.unsign) { + input[1][1]->setString(getHeapStr(hi.bits, true)); // make unsigned + } + // we cannot return HEAPU8 without a coercion, but at least we do HEAP8 & 255 => HEAPU8 | 0 + node[1]->setString(OR); + node[3][1]->setNumber(0); + return; + } + } + } else if (input[0] == BINARY && input[1] == RSHIFT && + input[2][0] == BINARY && input[2][1] == LSHIFT && + input[2][3][0] == NUM && input[3][0] == NUM && + input[2][3][1]->getInteger() == input[3][1]->getInteger() && + (~(0xFFFFFFFFu >> input[3][1]->getInteger()) & jsD2I(amount)) == 0) { + // x << 24 >> 24 & 255 => x & 255 + return safeCopy(node, make3(BINARY, AND, input[2][2], node[3])); + } + } else if (type == BINARY && node[1] == XOR) { + // LLVM represents bitwise not as xor with -1. Translate it back to an actual bitwise not. + if (node[3][0] == UNARY_PREFIX && node[3][1] == MINUS && node[3][2][0] == NUM && + node[3][2][1]->getNumber() == 1 && + !(node[2][0] == UNARY_PREFIX && node[2][1] == B_NOT)) { // avoid creating ~~~ which is confusing for asm given the role of ~~ + safeCopy(node, make2(UNARY_PREFIX, B_NOT, node[2])); + return; + } + } else if (type == BINARY && node[1] == RSHIFT && node[3][0] == NUM && + node[2][0] == BINARY && node[2][1] == LSHIFT && node[2][3][0] == NUM && + node[2][2][0] == SUB && node[2][2][1][0] == NAME) { + // collapse HEAPU?8[..] << 24 >> 24 etc. into HEAP8[..] | 0 + double amount = node[3][1]->getNumber(); + if (amount == node[2][3][1]->getNumber()) { + HeapInfo hi = parseHeap(node[2][2][1][1]->getCString()); + if (hi.valid && hi.bits == 32 - amount) { + node[2][2][1][1]->setString(getHeapStr(hi.bits, false)); + node[1]->setString(OR); + node[2] = node[2][2]; + node[3][1]->setNumber(0); + rerunOrZeroPass = true; + return; + } + } + } else if (type == ASSIGN) { + // optimizations for assigning into HEAP32 specifically + if (node[1]->isBool(true) && node[2][0] == SUB && node[2][1][0] == NAME) { + if (node[2][1][1] == HEAP32) { + // HEAP32[..] = x | 0 does not need the | 0 (unless it is a mandatory |0 of a call) + if (node[3][0] == BINARY && node[3][1] == OR) { + if (node[3][2][0] == NUM && node[3][2][1]->getNumber() == 0 && node[3][3][0] != CALL) { + node[3] = node[3][3]; + } else if (node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0 && node[3][2][0] != CALL) { + node[3] = node[3][2]; + } + } + } else if (node[2][1][1] == HEAP8) { + // HEAP8[..] = x & 0xff does not need the & 0xff + if (node[3][0] == BINARY && node[3][1] == AND && node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0xff) { + node[3] = node[3][2]; + } + } else if (node[2][1][1] == HEAP16) { + // HEAP16[..] = x & 0xffff does not need the & 0xffff + if (node[3][0] == BINARY && node[3][1] == AND && node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0xffff) { + node[3] = node[3][2]; + } + } + } + Ref value = node[3]; + if (value[0] == BINARY && value[1] == OR) { + // canonicalize order of |0 to end + if (value[2][0] == NUM && value[2][1]->getNumber() == 0) { + Ref temp = value[2]; + value[2] = value[3]; + value[3] = temp; + } + // if a seq ends in an |0, remove an external |0 + // note that it is only safe to do this in assigns, like we are doing here (return (x, y|0); is not valid) + if (value[2][0] == SEQ && value[2][2][0] == BINARY && USEFUL_BINARY_OPS.has(value[2][2][1])) { + node[3] = value[2]; + } + } + } else if (type == BINARY && node[1] == RSHIFT && node[2][0] == NUM && node[3][0] == NUM) { + // optimize num >> num, in asm we need this since we do not optimize shifts in asm.js + node[0]->setString(NUM); + node[1]->setNumber(jsD2I(node[2][1]->getNumber()) >> jsD2I(node[3][1]->getNumber())); + node->setSize(2); + return; + } else if (type == BINARY && node[1] == PLUS) { + // The most common mathop is addition, e.g. in getelementptr done repeatedly. We can join all of those, + // by doing (num+num) ==> newnum. + if (node[2][0] == NUM && node[3][0] == NUM) { + node[2][1]->setNumber(jsD2I(node[2][1]->getNumber()) + jsD2I(node[3][1]->getNumber())); + safeCopy(node, node[2]); + return; + } + } + }); + + if (rerunOrZeroPass) removeMultipleOrZero(); + + if (hasTempDoublePtr) { + AsmData asmData(ast); + traversePre(ast, [](Ref node) { + Ref type = node[0]; + if (type == ASSIGN) { + if (node[1]->isBool(true) && node[2][0] == SUB && node[2][1][0] == NAME && node[2][1][1] == HEAP32) { + // remove bitcasts that are now obviously pointless, e.g. + // HEAP32[$45 >> 2] = HEAPF32[tempDoublePtr >> 2] = ($14 < $28 ? $14 : $28) - $42, HEAP32[tempDoublePtr >> 2] | 0; + Ref value = node[3]; + if (value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][1][0] == NAME && value[1][2][1][1] == HEAPF32 && + value[1][2][2][0] == BINARY && value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR) { + // transform to HEAPF32[$45 >> 2] = ($14 < $28 ? $14 : $28) - $42; + node[2][1][1]->setString(HEAPF32); + node[3] = value[1][3]; + } + } + } else if (type == SEQ) { + // (HEAP32[tempDoublePtr >> 2] = HEAP32[$37 >> 2], +HEAPF32[tempDoublePtr >> 2]) + // ==> + // +HEAPF32[$37 >> 2] + if (node[0] == SEQ && node[1][0] == ASSIGN && node[1][2][0] == SUB && node[1][2][1][0] == NAME && + (node[1][2][1][1] == HEAP32 || node[1][2][1][1] == HEAPF32) && + node[1][2][2][0] == BINARY && node[1][2][2][2][0] == NAME && node[1][2][2][2][1] == TEMP_DOUBLE_PTR && + node[1][3][0] == SUB && node[1][3][1][0] == NAME && (node[1][3][1][1] == HEAP32 || node[1][3][1][1] == HEAPF32) && + node[2][0] != SEQ) { // avoid (x, y, z) which can be used for tempDoublePtr on doubles for alignment fixes + if (node[1][2][1][1] == HEAP32) { + node[1][3][1][1]->setString(HEAPF32); + safeCopy(node, makeAsmCoercion(node[1][3], detectType(node[2]))); + return; + } else { + node[1][3][1][1]->setString(HEAP32); + safeCopy(node, make3(BINARY, OR, node[1][3], makeNum(0))); + return; + } + } + } + }); + + // finally, wipe out remaining ones by finding cases where all assignments to X are bitcasts, and all uses are writes to + // the other heap type, then eliminate the bitcast + struct BitcastData { + int define_HEAP32, define_HEAPF32, use_HEAP32, use_HEAPF32, namings; + bool ok; + std::vector defines, uses; + + BitcastData() : define_HEAP32(0), define_HEAPF32(0), use_HEAP32(0), use_HEAPF32(0), namings(0), ok(false) {} + }; + std::unordered_map bitcastVars; + traversePre(ast, [&bitcastVars](Ref node) { + if (node[0] == ASSIGN && node[1]->isBool(true) && node[2][0] == NAME) { + Ref value = node[3]; + if (value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][1][0] == NAME && + (value[1][2][1][1] == HEAP32 || value[1][2][1][1] == HEAPF32) && + value[1][2][2][0] == BINARY && value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR) { + IString name = node[2][1]->getIString(); + IString heap = value[1][2][1][1]->getIString(); + if (heap == HEAP32) { + bitcastVars[name].define_HEAP32++; + } else { + assert(heap == HEAPF32); + bitcastVars[name].define_HEAPF32++; + } + bitcastVars[name].defines.push_back(node); + bitcastVars[name].ok = true; + } + } + }); + traversePre(ast, [&bitcastVars](Ref node) { + Ref type = node[0]; + if (type == NAME && bitcastVars[node[1]->getCString()].ok) { + bitcastVars[node[1]->getCString()].namings++; + } else if (type == ASSIGN && node[1]->isBool(true)) { + Ref value = node[3]; + if (value[0] == NAME) { + IString name = value[1]->getIString(); + if (bitcastVars[name].ok) { + Ref target = node[2]; + if (target[0] == SUB && target[1][0] == NAME && (target[1][1] == HEAP32 || target[1][1] == HEAPF32)) { + if (target[1][1] == HEAP32) { + bitcastVars[name].use_HEAP32++; + } else { + bitcastVars[name].use_HEAPF32++; + } + bitcastVars[name].uses.push_back(node); + } + } + } + } + }); + for (auto iter : bitcastVars) { + const IString& v = iter.first; + BitcastData& info = iter.second; + // good variables define only one type, use only one type, have definitions and uses, and define as a different type than they use + if (info.define_HEAP32*info.define_HEAPF32 == 0 && info.use_HEAP32*info.use_HEAPF32 == 0 && + info.define_HEAP32+info.define_HEAPF32 > 0 && info.use_HEAP32+info.use_HEAPF32 > 0 && + info.define_HEAP32*info.use_HEAP32 == 0 && info.define_HEAPF32*info.use_HEAPF32 == 0 && + asmData.isLocal(v.c_str()) && info.namings == info.define_HEAP32+info.define_HEAPF32+info.use_HEAP32+info.use_HEAPF32) { + IString& correct = info.use_HEAP32 ? HEAPF32 : HEAP32; + for (auto define : info.defines) { + define[3] = define[3][1][3]; + if (correct == HEAP32) { + define[3] = make3(BINARY, OR, define[3], makeNum(0)); + } else { + assert(correct == HEAPF32); + define[3] = makeAsmCoercion(define[3], preciseF32 ? ASM_FLOAT : ASM_DOUBLE); + } + // do we want a simplifybitops on the new values here? + } + for (auto use : info.uses) { + use[2][1][1]->setString(correct.c_str()); + } + AsmType correctType; + switch(asmData.getType(v.c_str())) { + case ASM_INT: correctType = preciseF32 ? ASM_FLOAT : ASM_DOUBLE; break; + case ASM_FLOAT: case ASM_DOUBLE: correctType = ASM_INT; break; + default: assert(0); + } + asmData.setType(v.c_str(), correctType); + } + } + asmData.denormalize(); + } + }; + + std::function emitsBoolean = [&emitsBoolean](Ref node) { + Ref type = node[0]; + if (type == NUM) { + return node[1]->getNumber() == 0 || node[1]->getNumber() == 1; + } + if (type == BINARY) return COMPARE_OPS.has(node[1]); + if (type == UNARY_PREFIX) return node[1] == L_NOT; + if (type == CONDITIONAL) return emitsBoolean(node[2]) && emitsBoolean(node[3]); + return false; + }; + + // expensive | expensive can be turned into expensive ? 1 : expensive, and + // expensive | cheap can be turned into cheap ? 1 : expensive, + // so that we can avoid the expensive computation, if it has no side effects. + auto conditionalize = [&emitsBoolean](Ref ast) { + traversePre(ast, [&emitsBoolean](Ref node) { + const int MIN_COST = 7; + if (node[0] == BINARY && (node[1] == OR || node[1] == AND) && node[3][0] != NUM && node[2][0] != NUM) { + // logical operator on two non-numerical values + Ref left = node[2]; + Ref right = node[3]; + if (!emitsBoolean(left) || !emitsBoolean(right)) return; + bool leftEffects = hasSideEffects(left); + bool rightEffects = hasSideEffects(right); + if (leftEffects && rightEffects) return; // both must execute + // canonicalize with side effects, if any, happening on the left + if (rightEffects) { + if (measureCost(left) < MIN_COST) return; // avoidable code is too cheap + Ref temp = left; + left = right; + right = temp; + } else if (leftEffects) { + if (measureCost(right) < MIN_COST) return; // avoidable code is too cheap + } else { + // no side effects, reorder based on cost estimation + int leftCost = measureCost(left); + int rightCost = measureCost(right); + if (std::max(leftCost, rightCost) < MIN_COST) return; // avoidable code is too cheap + // canonicalize with expensive code on the right + if (leftCost > rightCost) { + Ref temp = left; + left = right; + right = temp; + } + } + // worth it, perform conditionalization + Ref ret; + if (node[1] == OR) { + ret = make3(CONDITIONAL, left, makeNum(1), right); + } else { // & + ret = make3(CONDITIONAL, left, right, makeNum(0)); + } + if (left[0] == UNARY_PREFIX && left[1] == L_NOT) { + ret[1] = flipCondition(left); + Ref temp = ret[2]; + ret[2] = ret[3]; + ret[3] = temp; + } + safeCopy(node, ret); + return; + } + }); + }; + + traverseFunctions(ast, [&](Ref func) { + simplifyIntegerConversions(func); + simplifyOps(func); + traversePre(func, [](Ref node) { + Ref ret = simplifyNotCompsDirect(node); + if (ret.get() != node.get()) { // if we received a different pointer in return, then we need to copy the new value + safeCopy(node, ret); + } + }); + conditionalize(func); + }); +} + +void simplifyIfs(Ref ast) { + traverseFunctions(ast, [](Ref func) { + bool simplifiedAnElse = false; + + traversePre(func, [&simplifiedAnElse](Ref node) { + // simplify if (x) { if (y) { .. } } to if (x ? y : 0) { .. } + if (node[0] == IF) { + Ref body = node[2]; + // recurse to handle chains + while (body[0] == BLOCK) { + Ref stats = body[1]; + if (stats->size() == 0) break; + Ref other = stats->back(); + if (other[0] != IF) { + // our if block does not end with an if. perhaps if have an else we can flip + if (node->size() > 3 && !!node[3] && node[3][0] == BLOCK) { + stats = node[3][1]; + if (stats->size() == 0) break; + other = stats->back(); + if (other[0] == IF) { + // flip node + node[1] = flipCondition(node[1]); + node[2] = node[3]; + node[3] = body; + body = node[2]; + } else break; + } else break; + } + // we can handle elses, but must be fully identical + if (!!node[3] || !!other[3]) { + if (!node[3]) break; + if (!node[3]->deepCompare(other[3])) { + // the elses are different, but perhaps if we flipped a condition we can do better + if (node[3]->deepCompare(other[2])) { + // flip other. note that other may not have had an else! add one if so; we will eliminate such things later + if (!other[3]) other[3] = makeBlock(); + other[1] = flipCondition(other[1]); + Ref temp = other[2]; + other[2] = other[3]; + other[3] = temp; + } else break; + } + } + if (stats->size() > 1) { + // try to commaify - turn everything between the ifs into a comma operator inside the second if + bool ok = true; + for (size_t i = 0; i+1 < stats->size(); i++) { + Ref curr = deStat(stats[i]); + if (!commable(curr)) ok = false; + } + if (!ok) break; + for (int i = stats->size()-2; i >= 0; i--) { + Ref curr = deStat(stats[i]); + other[1] = make2(SEQ, curr, other[1]); + } + Ref temp = makeArray(1); + temp->push_back(other); + stats = body[1] = temp; + } + if (stats->size() != 1) break; + if (!!node[3]) simplifiedAnElse = true; + node[1] = make3(CONDITIONAL, node[1], other[1], makeNum(0)); + body = node[2] = other[2]; + } + } + }); + + if (simplifiedAnElse) { + // there may be fusing opportunities + + // we can only fuse if we remove all uses of the label. if there are + // other ones - if the label check can be reached from elsewhere - + // we must leave it + bool abort = false; + + std::unordered_map labelAssigns; + + traversePre(func, [&labelAssigns, &abort](Ref node) { + if (node[0] == ASSIGN && node[2][0] == NAME && node[2][1] == LABEL) { + if (node[3][0] == NUM) { + int value = node[3][1]->getInteger(); + labelAssigns[value] = labelAssigns[value] + 1; + } else { + // label is assigned a dynamic value (like from indirectbr), we cannot do anything + abort = true; + } + } + }); + if (abort) return; + + std::unordered_map labelChecks; + + traversePre(func, [&labelChecks, &abort](Ref node) { + if (node[0] == BINARY && node[1] == EQ && node[2][0] == BINARY && node[2][1] == OR && + node[2][2][0] == NAME && node[2][2][1] == LABEL) { + if (node[3][0] == NUM) { + int value = node[3][1]->getInteger(); + labelChecks[value] = labelChecks[value] + 1; + } else { + // label is checked vs a dynamic value (like from indirectbr), we cannot do anything + abort = true; + } + } + }); + if (abort) return; + + int inLoop = 0; // when in a loop, we do not emit label = 0; in the relooper as there is no need + traversePrePost(func, [&inLoop, &labelAssigns, &labelChecks](Ref node) { + if (node[0] == WHILE) inLoop++; + Ref stats = getStatements(node); + if (!!stats && stats->size() > 0) { + for (int i = 0; i < (int)stats->size()-1; i++) { + Ref pre = stats[i]; + Ref post = stats[i+1]; + if (pre[0] == IF && pre->size() > 3 && !!pre[3] && post[0] == IF && (post->size() <= 3 || !post[3])) { + Ref postCond = post[1]; + if (postCond[0] == BINARY && postCond[1] == EQ && + postCond[2][0] == BINARY && postCond[2][1] == OR && + postCond[2][2][0] == NAME && postCond[2][2][1] == LABEL && + postCond[2][3][0] == NUM && postCond[2][3][1]->getNumber() == 0 && + postCond[3][0] == NUM) { + int postValue = postCond[3][1]->getInteger(); + Ref preElse = pre[3]; + if (labelAssigns[postValue] == 1 && labelChecks[postValue] == 1 && preElse[0] == BLOCK && preElse->size() >= 2 && preElse[1]->size() == 1) { + Ref preStat = preElse[1][0]; + if (preStat[0] == STAT && preStat[1][0] == ASSIGN && + preStat[1][1]->isBool(true) && preStat[1][2][0] == NAME && preStat[1][2][1] == LABEL && + preStat[1][3][0] == NUM && preStat[1][3][1]->getNumber() == postValue) { + // Conditions match, just need to make sure the post clears label + if (post[2][0] == BLOCK && post[2]->size() >= 2 && post[2][1]->size() > 0) { + Ref postStat = post[2][1][0]; + bool haveClear = + postStat[0] == STAT && postStat[1][0] == ASSIGN && + postStat[1][1]->isBool(true) && postStat[1][2][0] == NAME && postStat[1][2][1] == LABEL && + postStat[1][3][0] == NUM && postStat[1][3][1]->getNumber() == 0; + if (!inLoop || haveClear) { + // Everything lines up, do it + pre[3] = post[2]; + if (haveClear) pre[3][1]->splice(0, 1); // remove the label clearing + stats->splice(i+1, 1); // remove the post entirely + } + } + } + } + } + } + } + } + }, [&inLoop](Ref node) { + if (node[0] == WHILE) inLoop--; + }); + assert(inLoop == 0); + } + }); +} + +void optimizeFrounds(Ref ast) { + // collapse fround(fround(..)), which can happen due to elimination + // also emit f0 instead of fround(0) (except in returns) + int inReturn = 0; + traversePrePost(ast, [&](Ref node) { + if (node[0] == RETURN) { + inReturn++; + } + }, [&](Ref node) { + if (node[0] == RETURN) { + inReturn--; + } + if (node[0] == CALL && node[1][0] == NAME && node[1][1] == MATH_FROUND) { + Ref arg = node[2][0]; + if (arg[0] == NUM) { + if (!inReturn && arg[1]->getInteger() == 0) { + safeCopy(node, makeName(F0)); + } + } else if (arg[0] == CALL && arg[1][0] == NAME && arg[1][1] == MATH_FROUND) { + safeCopy(node, arg); + } + } + }); +} + +// Very simple 'registerization', coalescing of variables into a smaller number. + +const char* getRegPrefix(AsmType type) { + switch (type) { + case ASM_INT: return "i"; break; + case ASM_DOUBLE: return "d"; break; + case ASM_FLOAT: return "f"; break; + case ASM_FLOAT32X4: return "F4"; break; + case ASM_FLOAT64X2: return "F2"; break; + case ASM_INT8X16: return "I16"; break; + case ASM_INT16X8: return "I8"; break; + case ASM_INT32X4: return "I4"; break; + case ASM_NONE: return "Z"; break; + default: assert(0); // type doesn't have a name yet + } + return nullptr; +} + +IString getRegName(AsmType type, int num) { + const char* str = getRegPrefix(type); + const int size = 256; + char temp[size]; + int written = sprintf(temp, "%s%d", str, num); + assert(written < size); + temp[written] = 0; + IString ret; + ret.set(temp, false); + return ret; +} + +void registerize(Ref ast) { + traverseFunctions(ast, [](Ref fun) { + AsmData asmData(fun); + // Add parameters as a first (fake) var (with assignment), so they get taken into consideration + // note: params are special, they can never share a register between them (see later) + Ref fake; + if (!!fun[2] && fun[2]->size()) { + Ref assign = makeNum(0); + // TODO: will be an isEmpty here, can reuse it. + fun[3]->insert(0, make1(VAR, fun[2]->map([&assign](Ref param) { + return &(makeArray(2)->push_back(param).push_back(assign)); + }))); + } + // Replace all var definitions with assignments; we will add var definitions at the top after we registerize + StringSet allVars; + traversePre(fun, [&](Ref node) { + Ref type = node[0]; + if (type == VAR) { + Ref vars = node[1]->filter([](Ref varr) { return varr->size() > 1; }); + if (vars->size() >= 1) { + safeCopy(node, unVarify(vars)); + } else { + safeCopy(node, makeEmpty()); + } + } else if (type == NAME) { + allVars.insert(node[1]->getIString()); + } + }); + removeAllUselessSubNodes(fun); // vacuum? + StringTypeMap regTypes; // reg name -> type + auto getNewRegName = [&](int num, IString name) { + AsmType type = asmData.getType(name); + IString ret = getRegName(type, num); + assert(!allVars.has(ret) || asmData.isLocal(ret)); // register must not shadow non-local name + regTypes[ret] = type; + return ret; + }; + // Find the # of uses of each variable. + // While doing so, check if all a variable's uses are dominated in a simple + // way by a simple assign, if so, then we can assign its register to it + // just for its definition to its last use, and not to the entire toplevel loop, + // we call such variables "optimizable" + StringIntMap varUses; + int level = 1; + std::unordered_map levelDominations; // level => set of dominated variables XXX vector? + StringIntMap varLevels; + StringSet possibles; + StringSet unoptimizables; + auto purgeLevel = [&]() { + // Invalidate all dominating on this level, further users make it unoptimizable + for (auto name : levelDominations[level]) { + varLevels[name] = 0; + } + levelDominations[level].clear(); + level--; + }; + std::function possibilifier = [&](Ref node) { + Ref type = node[0]; + if (type == NAME) { + IString name = node[1]->getIString(); + if (asmData.isLocal(name)) { + varUses[name]++; + if (possibles.has(name) && !varLevels[name]) unoptimizables.insert(name); // used outside of simple domination + } + } else if (type == ASSIGN && node[1]->isBool(true)) { + if (!!node[2] && node[2][0] == NAME) { + IString name = node[2][1]->getIString(); + // if local and not yet used, this might be optimizable if we dominate + // all other uses + if (asmData.isLocal(name) && !varUses[name] && !varLevels[name]) { + possibles.insert(name); + varLevels[name] = level; + levelDominations[level].insert(name); + } + } + } else if (CONTROL_FLOW.has(type)) { + // recurse children, in the context of a loop + if (type == WHILE || type == DO) { + traversePrePostConditional(node[1], possibilifier, [](Ref node){}); + level++; + traversePrePostConditional(node[2], possibilifier, [](Ref node){}); + purgeLevel(); + } else if (type == FOR) { + traversePrePostConditional(node[1], possibilifier, [](Ref node){}); + for (int i = 2; i <= 4; i++) { + level++; + traversePrePostConditional(node[i], possibilifier, [](Ref node){}); + purgeLevel(); + } + } else if (type == IF) { + traversePrePostConditional(node[1], possibilifier, [](Ref node){}); + level++; + traversePrePostConditional(node[2], possibilifier, [](Ref node){}); + purgeLevel(); + if (node->size() > 3 && !!node[3]) { + level++; + traversePrePostConditional(node[3], possibilifier, [](Ref node){}); + purgeLevel(); + } + } else if (type == SWITCH) { + traversePrePostConditional(node[1], possibilifier, [](Ref node){}); + Ref cases = node[2]; + for (size_t i = 0; i < cases->size(); i++) { + level++; + traversePrePostConditional(cases[i][1], possibilifier, [](Ref node){}); + purgeLevel(); + } + } else assert(0);; + return false; // prevent recursion into children, which we already did + } + return true; + }; + traversePrePostConditional(fun, possibilifier, [](Ref node){}); + + StringSet optimizables; + for (auto possible : possibles) { + if (!unoptimizables.has(possible)) optimizables.insert(possible); + } + + // Go through the function's code, assigning 'registers'. + // The only tricky bit is to keep variables locked on a register through loops, + // since they can potentially be returned to. Optimizable variables lock onto + // loops that they enter, unoptimizable variables lock in a conservative way + // into the topmost loop. + // Note that we cannot lock onto a variable in a loop if it was used and free'd + // before! (then they could overwrite us in the early part of the loop). For now + // we just use a fresh register to make sure we avoid this, but it could be + // optimized to check for safe registers (free, and not used in this loop level). + StringStringMap varRegs; // maps variables to the register they will use all their life + std::vector freeRegsClasses; + freeRegsClasses.resize(ASM_NONE); + int nextReg = 1; + StringVec fullNames; + fullNames.push_back(EMPTY); // names start at 1 + std::vector loopRegs; // for each loop nesting level, the list of bound variables + int loops = 0; // 0 is toplevel, 1 is first loop, etc + StringSet activeOptimizables; + StringIntMap optimizableLoops; + StringSet paramRegs; // true if the register is used by a parameter (and so needs no def at start of function; also cannot + // be shared with another param, each needs its own) + auto decUse = [&](IString name) { + if (!varUses[name]) return false; // no uses left, or not a relevant variable + if (optimizables.has(name)) activeOptimizables.insert(name); + IString reg = varRegs[name]; + assert(asmData.isLocal(name)); + StringVec& freeRegs = freeRegsClasses[asmData.getType(name)]; + if (!reg) { + // acquire register + if (optimizables.has(name) && freeRegs.size() > 0 && + !(asmData.isParam(name) && paramRegs.has(freeRegs.back()))) { // do not share registers between parameters + reg = freeRegs.back(); + freeRegs.pop_back(); + } else { + assert(fullNames.size() == nextReg); + reg = getNewRegName(nextReg++, name); + fullNames.push_back(reg); + if (asmData.isParam(name)) paramRegs.insert(reg); + } + varRegs[name] = reg; + } + varUses[name]--; + assert(varUses[name] >= 0); + if (varUses[name] == 0) { + if (optimizables.has(name)) activeOptimizables.erase(name); + // If we are not in a loop, or we are optimizable and not bound to a loop + // (we might have been in one but left it), we can free the register now. + if (loops == 0 || (optimizables.has(name) && !optimizableLoops.has(name))) { + // free register + freeRegs.push_back(reg); + } else { + // when the relevant loop is exited, we will free the register + int relevantLoop = optimizables.has(name) ? (optimizableLoops[name] ? optimizableLoops[name] : 1) : 1; + if ((int)loopRegs.size() <= relevantLoop+1) loopRegs.resize(relevantLoop+1); + loopRegs[relevantLoop].push_back(reg); + } + } + return true; + }; + traversePrePost(fun, [&](Ref node) { // XXX we rely on traversal order being the same as execution order here + Ref type = node[0]; + if (type == NAME) { + IString name = node[1]->getIString(); + if (decUse(name)) { + node[1]->setString(varRegs[name]); + } + } else if (LOOP.has(type)) { + loops++; + // Active optimizables lock onto this loop, if not locked onto one that encloses this one + for (auto name : activeOptimizables) { + if (!optimizableLoops[name]) { + optimizableLoops[name] = loops; + } + } + } + }, [&](Ref node) { + Ref type = node[0]; + if (LOOP.has(type)) { + // Free registers that were locked to this loop + if ((int)loopRegs.size() > loops && loopRegs[loops].size() > 0) { + for (auto loopReg : loopRegs[loops]) { + freeRegsClasses[regTypes[loopReg]].push_back(loopReg); + } + loopRegs[loops].clear(); + } + loops--; + } + }); + if (!!fun[2] && fun[2]->size()) { + fun[2]->setSize(0); // clear params, we will fill with registers + fun[3]->splice(0, 1); // remove fake initial var + } + + asmData.locals.clear(); + asmData.params.clear(); + asmData.vars.clear(); + for (int i = 1; i < nextReg; i++) { + IString reg = fullNames[i]; + AsmType type = regTypes[reg]; + if (!paramRegs.has(reg)) { + asmData.addVar(reg, type); + } else { + asmData.addParam(reg, type); + fun[2]->push_back(makeString(reg)); + } + } + asmData.denormalize(); + }); +} + +// Assign variables to 'registers', coalescing them onto a smaller number of shared +// variables. +// +// This does the same job as 'registerize' above, but burns a lot more cycles trying +// to reduce the total number of register variables. Key points about the operation: +// +// * we decompose the AST into a flow graph and perform a full liveness +// analysis, to determine which variables are live at each point. +// +// * variables that are live concurrently are assigned to different registers. +// +// * variables that are linked via 'x=y' style statements are assigned the same +// register if possible, so that the redundant assignment can be removed. +// (e.g. assignments used to pass state around through loops). +// +// * any code that cannot be reached through the flow-graph is removed. +// (e.g. redundant break statements like 'break L123; break;'). +// +// * any assignments that we can prove are not subsequently used are removed. +// (e.g. unnecessary assignments to the 'label' variable). +// +void registerizeHarder(Ref ast) { +#ifdef PROFILING + clock_t tasmdata = 0; + clock_t tflowgraph = 0; + clock_t tlabelfix = 0; + clock_t tbackflow = 0; + clock_t tjuncvaruniqassign = 0; + clock_t tjuncvarsort = 0; + clock_t tregassign = 0; + clock_t tblockproc = 0; + clock_t treconstruct = 0; +#endif + + traverseFunctions(ast, [&](Ref fun) { + +#ifdef PROFILING + clock_t start = clock(); +#endif + + // Do not try to process non-validating methods, like the heap replacer + bool abort = false; + traversePre(fun, [&abort](Ref node) { + if (node[0] == NEW) abort = true; + }); + if (abort) return; + + AsmData asmData(fun); + +#ifdef PROFILING + tasmdata += clock() - start; + start = clock(); +#endif + + // Utilities for allocating register variables. + // We need distinct register pools for each type of variable. + + typedef std::map IntStringMap; + std::vector allRegsByType; + allRegsByType.resize(ASM_NONE+1); + int nextReg = 1; + + auto createReg = [&](IString forName) { + // Create a new register of type suitable for the given variable name. + AsmType type = asmData.getType(forName); + IntStringMap& allRegs = allRegsByType[type]; + int reg = nextReg++; + allRegs[reg] = getRegName(type, reg); + return reg; + }; + + // Traverse the tree in execution order and synthesize a basic flow-graph. + // It's convenient to build a kind of "dual" graph where the nodes identify + // the junctions between blocks at which control-flow may branch, and each + // basic block is an edge connecting two such junctions. + // For each junction we store: + // * set of blocks that originate at the junction + // * set of blocks that terminate at the junction + // For each block we store: + // * a single entry junction + // * a single exit junction + // * a 'use' and 'kill' set of names for the block + // * full sequence of NAME and ASSIGN nodes in the block + // * whether each such node appears as part of a larger expression + // (and therefore cannot be safely eliminated) + // * set of labels that can be used to jump to this block + + struct Junction { + int id; + std::set inblocks, outblocks; + IOrderedStringSet live; + Junction(int id_) : id(id_) {} + }; + struct Node { + }; + struct Block { + int id, entry, exit; + std::set labels; + std::vector nodes; + std::vector isexpr; + StringIntMap use; + StringSet kill; + StringStringMap link; + StringIntMap lastUseLoc; + StringIntMap firstDeadLoc; + StringIntMap firstKillLoc; + StringIntMap lastKillLoc; + + Block() : id(-1), entry(-1), exit(-1) {} + }; + struct ContinueBreak { + int co, br; + ContinueBreak() : co(-1), br(-1) {} + ContinueBreak(int co_, int br_) : co(co_), br(br_) {} + }; + typedef std::unordered_map LabelState; + + std::vector junctions; + std::vector blocks; + int currEntryJunction = -1; + Block* nextBasicBlock = nullptr; + int isInExpr = 0; + std::vector activeLabels; + activeLabels.resize(1); + IString nextLoopLabel; + + const int ENTRY_JUNCTION = 0; + const int EXIT_JUNCTION = 1; + const int ENTRY_BLOCK = 0; + + auto addJunction = [&]() { + // Create a new junction, without inserting it into the graph. + // This is useful for e.g. pre-allocating an exit node. + int id = junctions.size(); + junctions.push_back(Junction(id)); + return id; + }; + + std::function joinJunction; + + auto markJunction = [&](int id) { + // Mark current traversal location as a junction. + // This makes a new basic block exiting at this position. + if (id < 0) { + id = addJunction(); + } + joinJunction(id, true); + return id; + }; + + auto setJunction = [&](int id, bool force) { + // Set the next entry junction to the given id. + // This can be used to enter at a previously-declared point. + // You can't return to a junction with no incoming blocks + // unless the 'force' parameter is specified. + assert(nextBasicBlock->nodes.size() == 0); // refusing to abandon an in-progress basic block + if (force || junctions[id].inblocks.size() > 0) { + currEntryJunction = id; + } else { + currEntryJunction = -1; + } + }; + + joinJunction = [&](int id, bool force) { + // Complete the pending basic block by exiting at this position. + // This can be used to exit at a previously-declared point. + if (currEntryJunction >= 0) { + assert(nextBasicBlock); + nextBasicBlock->id = blocks.size(); + nextBasicBlock->entry = currEntryJunction; + nextBasicBlock->exit = id; + junctions[currEntryJunction].outblocks.insert(nextBasicBlock->id); + junctions[id].inblocks.insert(nextBasicBlock->id); + blocks.push_back(nextBasicBlock); + } + nextBasicBlock = new Block(); + setJunction(id, force); + return id; + }; + + auto pushActiveLabels = [&](int onContinue, int onBreak) { + // Push the target junctions for continuing/breaking a loop. + // This should be called before traversing into a loop. + assert(activeLabels.size() > 0); + LabelState& prevLabels = activeLabels.back(); + LabelState newLabels = prevLabels; + newLabels[EMPTY] = ContinueBreak(onContinue, onBreak); + if (!!nextLoopLabel) { + newLabels[nextLoopLabel] = ContinueBreak(onContinue, onBreak); + nextLoopLabel = EMPTY; + } + // An unlabelled CONTINUE should jump to innermost loop, + // ignoring any nested SWITCH statements. + if (onContinue < 0 && prevLabels.count(EMPTY) > 0) { + newLabels[EMPTY].co = prevLabels[EMPTY].co; + } + activeLabels.push_back(newLabels); + }; + + auto popActiveLabels = [&]() { + // Pop the target junctions for continuing/breaking a loop. + // This should be called after traversing into a loop. + activeLabels.pop_back(); + }; + + auto markNonLocalJump = [&](IString type, IString label) { + // Complete a block via RETURN, BREAK or CONTINUE. + // This joins the targetted junction and then sets the current junction to null. + // Any code traversed before we get back to an existing junction is dead code. + if (type == RETURN) { + joinJunction(EXIT_JUNCTION, false); + } else { + assert(activeLabels.size() > 0); + assert(activeLabels.back().count(label) > 0); // 'jump to unknown label'); + auto targets = activeLabels.back()[label]; + if (type == CONTINUE) { + joinJunction(targets.co, false); + } else if (type == BREAK) { + joinJunction(targets.br, false); + } else { + assert(0); // 'unknown jump node type'); + } + } + currEntryJunction = -1; + }; + + auto addUseNode = [&](Ref node) { + // Mark a use of the given name node in the current basic block. + assert(node[0] == NAME); // 'not a use node'); + IString name = node[1]->getIString(); + if (asmData.isLocal(name)) { + nextBasicBlock->nodes.push_back(node); + nextBasicBlock->isexpr.push_back(isInExpr != 0); + if (nextBasicBlock->kill.count(name) == 0) { + nextBasicBlock->use[name] = 1; + } + } + }; + + auto addKillNode = [&](Ref node) { + // Mark an assignment to the given name node in the current basic block. + assert(node[0] == ASSIGN); //, 'not a kill node'); + assert(node[1]->isBool(true)); // 'not a kill node'); + assert(node[2][0] == NAME); //, 'not a kill node'); + IString name = node[2][1]->getIString(); + if (asmData.isLocal(name)) { + nextBasicBlock->nodes.push_back(node); + nextBasicBlock->isexpr.push_back(isInExpr != 0); + nextBasicBlock->kill.insert(name); + } + }; + + std::function lookThroughCasts = [&](Ref node) { + // Look through value-preserving casts, like "x | 0" => "x" + if (node[0] == BINARY && node[1] == OR) { + if (node[3][0] == NUM && node[3][1]->getNumber() == 0) { + return lookThroughCasts(node[2]); + } + } + return node; + }; + + auto addBlockLabel = [&](Ref node) { + assert(nextBasicBlock->nodes.size() == 0); // 'cant add label to an in-progress basic block') + if (node[0] == NUM) { + nextBasicBlock->labels.insert(node[1]->getInteger()); + } + }; + + auto isTrueNode = [&](Ref node) { + // Check if the given node is statically truthy. + return (node[0] == NUM && node[1]->getNumber() != 0); + }; + + auto isFalseNode = [&](Ref node) { + // Check if the given node is statically falsy. + return (node[0] == NUM && node[1]->getNumber() == 0); + }; + + std::function buildFlowGraph = [&](Ref node) { + // Recursive function to build up the flow-graph. + // It walks the tree in execution order, calling the above state-management + // functions at appropriate points in the traversal. + Ref type = node[0]; + + // Any code traversed without an active entry junction must be dead, + // as the resulting block could never be entered. Let's remove it. + if (currEntryJunction < 0 && junctions.size() > 0) { + safeCopy(node, makeEmpty()); + return; + } + + // Traverse each node type according to its particular control-flow semantics. + // TODO: switchify this + if (type == DEFUN) { + int jEntry = markJunction(-1); + assert(jEntry == ENTRY_JUNCTION); + int jExit = addJunction(); + assert(jExit == EXIT_JUNCTION); + for (size_t i = 0; i < node[3]->size(); i++) { + buildFlowGraph(node[3][i]); + } + joinJunction(jExit, false); + } else if (type == IF) { + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + int jEnter = markJunction(-1); + int jExit = addJunction(); + if (!!node[2]) { + // Detect and mark "if (label == N) { }". + if (node[1][0] == BINARY && node[1][1] == EQ) { + Ref lhs = lookThroughCasts(node[1][2]); + if (lhs[0] == NAME && lhs[1] == LABEL) { + addBlockLabel(lookThroughCasts(node[1][3])); + } + } + buildFlowGraph(node[2]); + } + joinJunction(jExit, false); + setJunction(jEnter, false); + if (node->size() > 3 && !!node[3]) { + buildFlowGraph(node[3]); + } + joinJunction(jExit, false); + } else if (type == CONDITIONAL) { + isInExpr++; + // If the conditional has no side-effects, we can treat it as a single + // block, which might open up opportunities to remove it entirely. + if (!hasSideEffects(node)) { + buildFlowGraph(node[1]); + if (!!node[2]) { + buildFlowGraph(node[2]); + } + if (!!node[3]) { + buildFlowGraph(node[3]); + } + } else { + buildFlowGraph(node[1]); + int jEnter = markJunction(-1); + int jExit = addJunction(); + if (!!node[2]) { + buildFlowGraph(node[2]); + } + joinJunction(jExit, false); + setJunction(jEnter, false); + if (!!node[3]) { + buildFlowGraph(node[3]); + } + joinJunction(jExit, false); + } + isInExpr--; + } else if (type == WHILE) { + // Special-case "while (1) {}" to use fewer junctions, + // since emscripten generates a lot of these. + if (isTrueNode(node[1])) { + int jLoop = markJunction(-1); + int jExit = addJunction(); + pushActiveLabels(jLoop, jExit); + buildFlowGraph(node[2]); + popActiveLabels(); + joinJunction(jLoop, false); + setJunction(jExit, false); + } else { + int jCond = markJunction(-1); + int jLoop = addJunction(); + int jExit = addJunction(); + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + joinJunction(jLoop, false); + pushActiveLabels(jCond, jExit); + buildFlowGraph(node[2]); + popActiveLabels(); + joinJunction(jCond, false); + // An empty basic-block linking condition exit to loop exit. + setJunction(jLoop, false); + joinJunction(jExit, false); + } + } else if (type == DO) { + // Special-case "do {} while (1)" and "do {} while (0)" to use + // fewer junctions, since emscripten generates a lot of these. + if (isFalseNode(node[1])) { + int jExit = addJunction(); + pushActiveLabels(jExit, jExit); + buildFlowGraph(node[2]); + popActiveLabels(); + joinJunction(jExit, false); + } else if (isTrueNode(node[1])) { + int jLoop = markJunction(-1); + int jExit = addJunction(); + pushActiveLabels(jLoop, jExit); + buildFlowGraph(node[2]); + popActiveLabels(); + joinJunction(jLoop, false); + setJunction(jExit, false); + } else { + int jLoop = markJunction(-1); + int jCond = addJunction(); + int jCondExit = addJunction(); + int jExit = addJunction(); + pushActiveLabels(jCond, jExit); + buildFlowGraph(node[2]); + popActiveLabels(); + joinJunction(jCond, false); + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + joinJunction(jCondExit, false); + joinJunction(jLoop, false); + setJunction(jCondExit, false); + joinJunction(jExit, false); + } + } else if (type == FOR) { + int jTest = addJunction(); + int jBody = addJunction(); + int jStep = addJunction(); + int jExit = addJunction(); + buildFlowGraph(node[1]); + joinJunction(jTest, false); + isInExpr++; + buildFlowGraph(node[2]); + isInExpr--; + joinJunction(jBody, false); + pushActiveLabels(jStep, jExit); + buildFlowGraph(node[4]); + popActiveLabels(); + joinJunction(jStep, false); + buildFlowGraph(node[3]); + joinJunction(jTest, false); + setJunction(jBody, false); + joinJunction(jExit, false); + } else if (type == LABEL) { + assert(BREAK_CAPTURERS.has(node[2][0])); // 'label on non-loop, non-switch statement') + nextLoopLabel = node[1]->getIString(); + buildFlowGraph(node[2]); + } else if (type == SWITCH) { + // Emscripten generates switch statements of a very limited + // form: all case clauses are numeric literals, and all + // case bodies end with a (maybe implicit) break. So it's + // basically equivalent to a multi-way IF statement. + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + Ref condition = lookThroughCasts(node[1]); + int jCheckExit = markJunction(-1); + int jExit = addJunction(); + pushActiveLabels(-1, jExit); + bool hasDefault = false; + for (size_t i = 0; i < node[2]->size(); i++) { + setJunction(jCheckExit, false); + // All case clauses are either 'default' or a numeric literal. + if (!node[2][i][0]) { + hasDefault = true; + } else { + // Detect switches dispatching to labelled blocks. + if (condition[0] == NAME && condition[1] == LABEL) { + addBlockLabel(lookThroughCasts(node[2][i][0])); + } + } + for (size_t j = 0; j < node[2][i][1]->size(); j++) { + buildFlowGraph(node[2][i][1][j]); + } + // Control flow will never actually reach the end of the case body. + // If there's live code here, assume it jumps to case exit. + if (currEntryJunction >= 0 && nextBasicBlock->nodes.size() > 0) { + if (!!node[2][i][0]) { + markNonLocalJump(RETURN, EMPTY); + } else { + joinJunction(jExit, false); + } + } + } + // If there was no default case, we also need an empty block + // linking straight from the test evaluation to the exit. + if (!hasDefault) { + setJunction(jCheckExit, false); + } + joinJunction(jExit, false); + popActiveLabels(); + } else if (type == RETURN) { + if (!!node[1]) { + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + } + markNonLocalJump(type->getIString(), EMPTY); + } else if (type == BREAK || type == CONTINUE) { + markNonLocalJump(type->getIString(), !!node[1] ? node[1]->getIString() : EMPTY); + } else if (type == ASSIGN) { + isInExpr++; + buildFlowGraph(node[3]); + isInExpr--; + if (node[1]->isBool(true) && node[2][0] == NAME) { + addKillNode(node); + } else { + buildFlowGraph(node[2]); + } + } else if (type == NAME) { + addUseNode(node); + } else if (type == BLOCK || type == TOPLEVEL) { + if (!!node[1]) { + for (size_t i = 0; i < node[1]->size(); i++) { + buildFlowGraph(node[1][i]); + } + } + } else if (type == STAT) { + buildFlowGraph(node[1]); + } else if (type == UNARY_PREFIX || type == UNARY_POSTFIX) { + isInExpr++; + buildFlowGraph(node[2]); + isInExpr--; + } else if (type == BINARY) { + isInExpr++; + buildFlowGraph(node[2]); + buildFlowGraph(node[3]); + isInExpr--; + } else if (type == CALL) { + isInExpr++; + buildFlowGraph(node[1]); + if (!!node[2]) { + for (size_t i = 0; i < node[2]->size(); i++) { + buildFlowGraph(node[2][i]); + } + } + isInExpr--; + // If the call is statically known to throw, + // treat it as a jump to function exit. + if (!isInExpr && node[1][0] == NAME) { + if (FUNCTIONS_THAT_ALWAYS_THROW.has(node[1][1])) { + markNonLocalJump(RETURN, EMPTY); + } + } + } else if (type == SEQ || type == SUB) { + isInExpr++; + buildFlowGraph(node[1]); + buildFlowGraph(node[2]); + isInExpr--; + } else if (type == DOT || type == THROW) { + isInExpr++; + buildFlowGraph(node[1]); + isInExpr--; + } else if (type == NUM || type == STRING || type == VAR) { + // nada + } else { + assert(0); // 'unsupported node type: ' + type); + } + }; + + buildFlowGraph(fun); + +#ifdef PROFILING + tflowgraph += clock() - start; + start = clock(); +#endif + + assert(junctions[ENTRY_JUNCTION].inblocks.size() == 0); // 'function entry must have no incoming blocks'); + assert(junctions[EXIT_JUNCTION].outblocks.size() == 0); // 'function exit must have no outgoing blocks'); + assert(blocks[ENTRY_BLOCK]->entry == ENTRY_JUNCTION); //, 'block zero must be the initial block'); + + // Fix up implicit jumps done by assigning to the LABEL variable. + // If a block ends with an assignment to LABEL and there's another block + // with that value of LABEL as precondition, we tweak the flow graph so + // that the former jumps straight to the later. + + std::map labelledBlocks; + typedef std::pair Jump; + std::vector labelledJumps; + + for (size_t i = 0; i < blocks.size(); i++) { + Block* block = blocks[i]; + // Does it have any labels as preconditions to its entry? + for (auto labelVal : block->labels) { + // If there are multiple blocks with the same label, all bets are off. + // This seems to happen sometimes for short blocks that end with a return. + // TODO: it should be safe to merge the duplicates if they're identical. + if (labelledBlocks.count(labelVal) > 0) { + labelledBlocks.clear(); + labelledJumps.clear(); + goto AFTER_FINDLABELLEDBLOCKS; + } + labelledBlocks[labelVal] = block; + } + // Does it assign a specific label value at exit? + if (block->kill.has(LABEL)) { + Ref finalNode = block->nodes.back(); + if (finalNode[0] == ASSIGN && finalNode[2][1] == LABEL) { + // If labels are computed dynamically then all bets are off. + // This can happen due to indirect branching in llvm output. + if (finalNode[3][0] != NUM) { + labelledBlocks.clear(); + labelledJumps.clear(); + goto AFTER_FINDLABELLEDBLOCKS; + } + labelledJumps.push_back(Jump(finalNode[3][1], block)); + } else { + // If label is assigned a non-zero value elsewhere in the block + // then all bets are off. This can happen e.g. due to outlining + // saving/restoring label to the stack. + for (size_t j = 0; j < block->nodes.size() - 1; j++) { + if (block->nodes[j][0] == ASSIGN && block->nodes[j][2][1] == LABEL) { + if (block->nodes[j][3][0] != NUM || block->nodes[j][3][1]->getNumber() != 0) { + labelledBlocks.clear(); + labelledJumps.clear(); + goto AFTER_FINDLABELLEDBLOCKS; + } + } + } + } + } + } + + AFTER_FINDLABELLEDBLOCKS: + + for (auto labelVal : labelledBlocks) { + Block* block = labelVal.second; + // Disconnect it from the graph, and create a + // new junction for jumps targetting this label. + junctions[block->entry].outblocks.erase(block->id); + block->entry = addJunction(); + junctions[block->entry].outblocks.insert(block->id); + // Add a fake use of LABEL to keep it alive in predecessor. + block->use[LABEL] = 1; + block->nodes.insert(block->nodes.begin(), makeName(LABEL)); + block->isexpr.insert(block->isexpr.begin(), 1); + } + for (size_t i = 0; i < labelledJumps.size(); i++) { + auto labelVal = labelledJumps[i].first; + auto block = labelledJumps[i].second; + Block* targetBlock = labelledBlocks[labelVal->getInteger()]; + if (targetBlock) { + // Redirect its exit to entry of the target block. + junctions[block->exit].inblocks.erase(block->id); + block->exit = targetBlock->entry; + junctions[block->exit].inblocks.insert(block->id); + } + } + +#ifdef PROFILING + tlabelfix += clock() - start; + start = clock(); +#endif + + // Do a backwards data-flow analysis to determine the set of live + // variables at each junction, and to use this information to eliminate + // any unused assignments. + // We run two nested phases. The inner phase builds the live set for each + // junction. The outer phase uses this to try to eliminate redundant + // stores in each basic block, which might in turn affect liveness info. + + auto analyzeJunction = [&](Junction& junc) { + // Update the live set for this junction. + IOrderedStringSet live; + for (auto b : junc.outblocks) { + Block* block = blocks[b]; + IOrderedStringSet& liveSucc = junctions[block->exit].live; + for (auto name : liveSucc) { + if (!block->kill.has(name)) { + live.insert(name); + } + } + for (auto name : block->use) { + live.insert(name.first); + } + } + junc.live = live; + }; + + auto analyzeBlock = [&](Block* block) { + // Update information about the behaviour of the block. + // This includes the standard 'use' and 'kill' information, + // plus a 'link' set naming values that flow through from entry + // to exit, possibly changing names via simple 'x=y' assignments. + // As we go, we eliminate assignments if the variable is not + // subsequently used. + auto live = junctions[block->exit].live; + StringIntMap use; + StringSet kill; + StringStringMap link; + StringIntMap lastUseLoc; + StringIntMap firstDeadLoc; + StringIntMap firstKillLoc; + StringIntMap lastKillLoc; + for (auto name : live) { + link[name] = name; + lastUseLoc[name] = block->nodes.size(); + firstDeadLoc[name] = block->nodes.size(); + } + for (int j = block->nodes.size() - 1; j >= 0 ; j--) { + Ref node = block->nodes[j]; + if (node[0] == NAME) { + IString name = node[1]->getIString(); + live.insert(name); + use[name] = j; + if (lastUseLoc.count(name) == 0) { + lastUseLoc[name] = j; + firstDeadLoc[name] = j; + } + } else { + IString name = node[2][1]->getIString(); + // We only keep assignments if they will be subsequently used. + if (live.has(name)) { + kill.insert(name); + use.erase(name); + live.erase(name); + firstDeadLoc[name] = j; + firstKillLoc[name] = j; + if (lastUseLoc.count(name) == 0) { + lastUseLoc[name] = j; + } + if (lastKillLoc.count(name) == 0) { + lastKillLoc[name] = j; + } + // If it's an "x=y" and "y" is not live, then we can create a + // flow-through link from "y" to "x". If not then there's no + // flow-through link for "x". + if (link.has(name)) { + IString oldLink = link[name]; + link.erase(name); + if (node[3][0] == NAME) { + if (asmData.isLocal(node[3][1]->getIString())) { + link[node[3][1]->getIString()] = oldLink; + } + } + } + } else { + // The result of this assignment is never used, so delete it. + // We may need to keep the RHS for its value or its side-effects. + auto removeUnusedNodes = [&](int j, int n) { + for (auto pair : lastUseLoc) { + pair.second -= n; + } + for (auto pair : firstKillLoc) { + pair.second -= n; + } + for (auto pair : lastKillLoc) { + pair.second -= n; + } + for (auto pair : firstDeadLoc) { + pair.second -= n; + } + block->nodes.erase(block->nodes.begin() + j, block->nodes.begin() + j + n); + block->isexpr.erase(block->isexpr.begin() + j, block->isexpr.begin() + j + n); + }; + if (block->isexpr[j] || hasSideEffects(node[3])) { + safeCopy(node, node[3]); + removeUnusedNodes(j, 1); + } else { + int numUsesInExpr = 0; + traversePre(node[3], [&](Ref node) { + if (node[0] == NAME && asmData.isLocal(node[1]->getIString())) { + numUsesInExpr++; + } + }); + safeCopy(node, makeEmpty()); + j = j - numUsesInExpr; + removeUnusedNodes(j, 1 + numUsesInExpr); + } + } + } + } + // XXX efficiency + block->use = use; + block->kill = kill; + block->link = link; + block->lastUseLoc = lastUseLoc; + block->firstDeadLoc = firstDeadLoc; + block->firstKillLoc = firstKillLoc; + block->lastKillLoc = lastKillLoc; + }; + + // Ordered map to work in approximate reverse order of junction appearance + std::set jWorkSet; + std::set bWorkSet; + + // Be sure to visit every junction at least once. + // This avoids missing some vars because we disconnected them + // when processing the labelled jumps. + for (size_t i = EXIT_JUNCTION; i < junctions.size(); i++) { + jWorkSet.insert(i); + for (auto b : junctions[i].inblocks) { + bWorkSet.insert(b); + } + } + // Exit junction never has any live variable changes to propagate + jWorkSet.erase(EXIT_JUNCTION); + + do { + // Iterate on just the junctions until we get stable live sets. + // The first run of this loop will grow the live sets to their maximal size. + // Subsequent runs will shrink them based on eliminated in-block uses. + while (jWorkSet.size() > 0) { + auto last = jWorkSet.end(); + --last; + Junction& junc = junctions[*last]; + jWorkSet.erase(last); + IOrderedStringSet oldLive = junc.live; // copy it here, to check for changes later + analyzeJunction(junc); + if (oldLive != junc.live) { + // Live set changed, updated predecessor blocks and junctions. + for (auto b : junc.inblocks) { + bWorkSet.insert(b); + jWorkSet.insert(blocks[b]->entry); + } + } + } + // Now update the blocks based on the calculated live sets. + while (bWorkSet.size() > 0) { + auto last = bWorkSet.end(); + --last; + Block* block = blocks[*last]; + bWorkSet.erase(last); + auto oldUse = block->use; + analyzeBlock(block); + if (oldUse != block->use) { + // The use set changed, re-process the entry junction. + jWorkSet.insert(block->entry); + } + } + } while (jWorkSet.size() > 0); + +#ifdef PROFILING + tbackflow += clock() - start; + start = clock(); +#endif + + // Insist that all function parameters are alive at function entry. + // This ensures they will be assigned independent registers, even + // if they happen to be unused. + + for (auto name : asmData.params) { + junctions[ENTRY_JUNCTION].live.insert(name); + } + + // For variables that are live at one or more junctions, we assign them + // a consistent register for the entire scope of the function. Find pairs + // of variable that cannot use the same register (the "conflicts") as well + // as pairs of variables that we'd like to have share the same register + // (the "links"). + + struct JuncVar { + std::vector conf; + IOrderedStringSet link; + std::unordered_set excl; + int reg; + bool used; + JuncVar() : reg(-1), used(false) {} + }; + size_t numLocals = asmData.locals.size(); + std::unordered_map nameToNum; + std::vector numToName; + nameToNum.reserve(numLocals); + numToName.reserve(numLocals); + for (auto kv : asmData.locals) { + nameToNum[kv.first] = numToName.size(); + numToName.push_back(kv.first); + } + + std::vector juncVars(numLocals); + for (Junction& junc : junctions) { + for (IString name : junc.live) { + JuncVar& jVar = juncVars[nameToNum[name]]; + jVar.used = true; + jVar.conf.assign(numLocals, false); + } + } + std::map> possibleBlockConflictsMap; + std::vector>> possibleBlockConflicts; + std::unordered_map> possibleBlockLinks; + possibleBlockConflicts.reserve(numLocals); + possibleBlockLinks.reserve(numLocals); + + for (Junction& junc : junctions) { + // Pre-compute the possible conflicts and links for each block rather + // than checking potentially impossible options for each var + possibleBlockConflictsMap.clear(); + possibleBlockConflicts.clear(); + possibleBlockLinks.clear(); + for (auto b : junc.outblocks) { + Block* block = blocks[b]; + Junction& jSucc = junctions[block->exit]; + for (auto name : jSucc.live) { + possibleBlockConflictsMap[name].push_back(block); + } + for (auto name_linkname : block->link) { + if (name_linkname.first != name_linkname.second) { + possibleBlockLinks[name_linkname.first].push_back(block); + } + } + } + // Find the live variables in this block, mark them as unnecessary to + // check for conflicts (we mark all live vars as conflicting later) + std::vector liveJVarNums; + liveJVarNums.reserve(junc.live.size()); + for (auto name : junc.live) { + size_t jVarNum = nameToNum[name]; + liveJVarNums.push_back(jVarNum); + possibleBlockConflictsMap.erase(name); + } + // Extract just the variables we might want to check for conflicts + for (auto kv : possibleBlockConflictsMap) { + possibleBlockConflicts.push_back(std::make_pair(nameToNum[kv.first], kv.second)); + } + + for (size_t jVarNum : liveJVarNums) { + JuncVar& jvar = juncVars[jVarNum]; + IString name = numToName[jVarNum]; + // It conflicts with all other names live at this junction. + for (size_t liveJVarNum : liveJVarNums) { + jvar.conf[liveJVarNum] = true; + } + jvar.conf[jVarNum] = false; // except for itself, of course + + // It conflicts with any output vars of successor blocks, + // if they're assigned before it goes dead in that block. + for (auto jvarnum_blocks : possibleBlockConflicts) { + size_t otherJVarNum = jvarnum_blocks.first; + IString otherName = numToName[otherJVarNum]; + for (auto block : jvarnum_blocks.second) { + if (block->lastKillLoc[otherName] < block->firstDeadLoc[name]) { + jvar.conf[otherJVarNum] = true; + juncVars[otherJVarNum].conf[jVarNum] = true; + break; + } + } + } + + // It links with any linkages in the outgoing blocks. + for (auto block: possibleBlockLinks[name]) { + IString linkName = block->link[name]; + jvar.link.insert(linkName); + juncVars[nameToNum[linkName]].link.insert(name); + } + } + } + +#ifdef PROFILING + tjuncvaruniqassign += clock() - start; + start = clock(); +#endif + + // Attempt to sort the junction variables to heuristically reduce conflicts. + // Simple starting point: handle the most-conflicted variables first. + // This seems to work pretty well. + + std::vector sortedJVarNums; + sortedJVarNums.reserve(juncVars.size()); + std::vector jVarConfCounts(numLocals); + for (size_t jVarNum = 0; jVarNum < juncVars.size(); jVarNum++) { + JuncVar& jVar = juncVars[jVarNum]; + if (!jVar.used) continue; + jVarConfCounts[jVarNum] = std::count(jVar.conf.begin(), jVar.conf.end(), true); + sortedJVarNums.push_back(jVarNum); + } + std::sort(sortedJVarNums.begin(), sortedJVarNums.end(), [&](const size_t vi1, const size_t vi2) { + // sort by # of conflicts + if (jVarConfCounts[vi1] < jVarConfCounts[vi2]) return true; + if (jVarConfCounts[vi1] == jVarConfCounts[vi2]) return numToName[vi1] < numToName[vi2]; + return false; + }); + +#ifdef PROFILING + tjuncvarsort += clock() - start; + start = clock(); +#endif + + // We can now assign a register to each junction variable. + // Process them in order, trying available registers until we find + // one that works, and propagating the choice to linked/conflicted + // variables as we go. + + std::function tryAssignRegister = [&](IString name, int reg) { + // Try to assign the given register to the given variable, + // and propagate that choice throughout the graph. + // Returns true if successful, false if there was a conflict. + JuncVar& jv = juncVars[nameToNum[name]]; + if (jv.reg > 0) { + return jv.reg == reg; + } + if (jv.excl.count(reg) > 0) { + return false; + } + jv.reg = reg; + // Exclude use of this register at all conflicting variables. + for (size_t confNameNum = 0; confNameNum < jv.conf.size(); confNameNum++) { + if (jv.conf[confNameNum]) { + juncVars[confNameNum].excl.insert(reg); + } + } + // Try to propagate it into linked variables. + // It's not an error if we can't. + for (auto linkName : jv.link) { + tryAssignRegister(linkName, reg); + } + return true; + }; + for (size_t jVarNum : sortedJVarNums) { + // It may already be assigned due to linked-variable propagation. + if (juncVars[jVarNum].reg > 0) { + continue; + } + IString name = numToName[jVarNum]; + // Try to use existing registers first. + auto& allRegs = allRegsByType[asmData.getType(name)]; + bool moar = false; + for (auto reg : allRegs) { + if (tryAssignRegister(name, reg.first)) { + moar = true; + break; + } + } + if (moar) continue; + // They're all taken, create a new one. + tryAssignRegister(name, createReg(name)); + } + +#ifdef PROFILING + tregassign += clock() - start; + start = clock(); +#endif + + // Each basic block can now be processed in turn. + // There may be internal-use-only variables that still need a register + // assigned, but they can be treated just for this block. We know + // that all inter-block variables are in a good state thanks to + // junction variable consistency. + + for (size_t i = 0; i < blocks.size(); i++) { + Block* block = blocks[i]; + if (block->nodes.size() == 0) continue; + Junction& jEnter = junctions[block->entry]; + Junction& jExit = junctions[block->exit]; + // Mark the point at which each input reg becomes dead. + // Variables alive before this point must not be assigned + // to that register. + StringSet inputVars; + std::unordered_map inputDeadLoc; + std::unordered_map inputVarsByReg; + for (auto name : jExit.live) { + if (!block->kill.has(name)) { + inputVars.insert(name); + int reg = juncVars[nameToNum[name]].reg; + assert(reg > 0); // 'input variable doesnt have a register'); + inputDeadLoc[reg] = block->firstDeadLoc[name]; + inputVarsByReg[reg] = name; + } + } + for (auto pair : block->use) { + IString name = pair.first; + if (!inputVars.has(name)) { + inputVars.insert(name); + int reg = juncVars[nameToNum[name]].reg; + assert(reg > 0); // 'input variable doesnt have a register'); + inputDeadLoc[reg] = block->firstDeadLoc[name]; + inputVarsByReg[reg] = name; + } + } + // TODO assert(setSize(setSub(inputVars, jEnter.live)) == 0); + // Scan through backwards, allocating registers on demand. + // Be careful to avoid conflicts with the input registers. + // We consume free registers in last-used order, which helps to + // eliminate "x=y" assignments that are the last use of "y". + StringIntMap assignedRegs; + auto freeRegsByTypePre = allRegsByType; // XXX copy + // Begin with all live vars assigned per the exit junction. + for (auto name : jExit.live) { + int reg = juncVars[nameToNum[name]].reg; + assert(reg > 0); // 'output variable doesnt have a register'); + assignedRegs[name] = reg; + freeRegsByTypePre[asmData.getType(name)].erase(reg); // XXX assert? + } + std::vector> freeRegsByType; + freeRegsByType.resize(freeRegsByTypePre.size()); + for (size_t j = 0; j < freeRegsByTypePre.size(); j++) { + for (auto pair : freeRegsByTypePre[j]) { + freeRegsByType[j].push_back(pair.first); + } + } + // Scan through the nodes in sequence, modifying each node in-place + // and grabbing/freeing registers as needed. + std::vector> maybeRemoveNodes; + for (int j = block->nodes.size() - 1; j >= 0; j--) { + Ref node = block->nodes[j]; + IString name = (node[0] == ASSIGN ? node[2][1] : node[1])->getIString(); + IntStringMap& allRegs = allRegsByType[asmData.getType(name)]; + std::vector& freeRegs = freeRegsByType[asmData.getType(name)]; + int reg = assignedRegs[name]; // XXX may insert a zero + if (node[0] == NAME) { + // A use. Grab a register if it doesn't have one. + if (reg <= 0) { + if (inputVars.has(name) && j <= block->firstDeadLoc[name]) { + // Assignment to an input variable, must use pre-assigned reg. + reg = juncVars[nameToNum[name]].reg; + assignedRegs[name] = reg; + for (int k = freeRegs.size() - 1; k >= 0; k--) { + if (freeRegs[k] == reg) { + freeRegs.erase(freeRegs.begin() + k); + break; + } + } + } else { + // Try to use one of the existing free registers. + // It must not conflict with an input register. + for (int k = freeRegs.size() - 1; k >= 0; k--) { + reg = freeRegs[k]; + // Check for conflict with input registers. + if (inputDeadLoc.count(reg) > 0) { + if (block->firstKillLoc[name] <= inputDeadLoc[reg]) { + if (name != inputVarsByReg[reg]) { + continue; + } + } + } + // Found one! + assignedRegs[name] = reg; + assert(reg > 0); + freeRegs.erase(freeRegs.begin() + k); + break; + } + // If we didn't find a suitable register, create a new one. + if (assignedRegs[name] <= 0) { + reg = createReg(name); + assignedRegs[name] = reg; + } + } + } + node[1]->setString(allRegs[reg]); + } else { + // A kill. This frees the assigned register. + assert(reg > 0); //, 'live variable doesnt have a reg?') + node[2][1]->setString(allRegs[reg]); + freeRegs.push_back(reg); + assignedRegs.erase(name); + if (node[3][0] == NAME && asmData.isLocal(node[3][1]->getIString())) { + maybeRemoveNodes.push_back(std::pair(j, node)); + } + } + } + // If we managed to create any "x=x" assignments, remove them. + for (size_t j = 0; j < maybeRemoveNodes.size(); j++) { + Ref node = maybeRemoveNodes[j].second; + if (node[2][1] == node[3][1]) { + if (block->isexpr[maybeRemoveNodes[j].first]) { + safeCopy(node, node[2]); + } else { + safeCopy(node, makeEmpty()); + } + } + } + } + +#ifdef PROFILING + tblockproc += clock() - start; + start = clock(); +#endif + + // Assign registers to function params based on entry junction + + StringSet paramRegs; + if (!!fun[2]) { + for (size_t i = 0; i < fun[2]->size(); i++) { + auto& allRegs = allRegsByType[asmData.getType(fun[2][i]->getIString())]; + fun[2][i]->setString(allRegs[juncVars[nameToNum[fun[2][i]->getIString()]].reg]); + paramRegs.insert(fun[2][i]->getIString()); + } + } + + // That's it! + // Re-construct the function with appropriate variable definitions. + + asmData.locals.clear(); + asmData.params.clear(); + asmData.vars.clear(); + for (int i = 1; i < nextReg; i++) { + for (size_t type = 0; type < allRegsByType.size(); type++) { + if (allRegsByType[type].count(i) > 0) { + IString reg = allRegsByType[type][i]; + if (!paramRegs.has(reg)) { + asmData.addVar(reg, intToAsmType(type)); + } else { + asmData.addParam(reg, intToAsmType(type)); + } + break; + } + } + } + asmData.denormalize(); + + removeAllUselessSubNodes(fun); // XXX vacuum? vacuum(fun); + +#ifdef PROFILING + treconstruct += clock() - start; + start = clock(); +#endif + + }); +#ifdef PROFILING + errv(" RH stages: a:%li fl:%li lf:%li bf:%li jvua:%li jvs:%li jra:%li bp:%li r:%li", + tasmdata, tflowgraph, tlabelfix, tbackflow, tjuncvaruniqassign, tjuncvarsort, tregassign, tblockproc, treconstruct); +#endif +} +// end registerizeHarder + +// minified names generation +StringSet RESERVED("do if in for new try var env let"); +const char *VALID_MIN_INITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; +const char *VALID_MIN_LATERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$0123456789"; + +StringVec minifiedNames; +std::vector minifiedState; + +void ensureMinifiedNames(int n) { // make sure the nth index in minifiedNames exists. done 100% deterministically + static int VALID_MIN_INITS_LEN = strlen(VALID_MIN_INITS); + static int VALID_MIN_LATERS_LEN = strlen(VALID_MIN_LATERS); + + while ((int)minifiedNames.size() < n+1) { + // generate the current name + std::string name; + name += VALID_MIN_INITS[minifiedState[0]]; + for (size_t i = 1; i < minifiedState.size(); i++) { + name += VALID_MIN_LATERS[minifiedState[i]]; + } + IString str(strdupe(name.c_str())); // leaked! + if (!RESERVED.has(str)) minifiedNames.push_back(str); + // increment the state + size_t i = 0; + while (1) { + minifiedState[i]++; + if (minifiedState[i] < (i == 0 ? VALID_MIN_INITS_LEN : VALID_MIN_LATERS_LEN)) break; + // overflow + minifiedState[i] = 0; + i++; + if (i == minifiedState.size()) minifiedState.push_back(-1); // will become 0 after increment in next loop head + } + } +} + +void minifyLocals(Ref ast) { + assert(!!extraInfo); + IString GLOBALS("globals"); + assert(extraInfo->has(GLOBALS)); + Ref globals = extraInfo[GLOBALS]; + + if (minifiedState.size() == 0) minifiedState.push_back(0); + + traverseFunctions(ast, [&globals](Ref fun) { + // Analyse the asmjs to figure out local variable names, + // but operate on the original source tree so that we don't + // miss any global names in e.g. variable initializers. + AsmData asmData(fun); + asmData.denormalize(); // TODO: we can avoid modifying at all here - we just need a list of local vars+params + + StringStringMap newNames; + StringSet usedNames; + + // Find all the globals that we need to minify using + // pre-assigned names. Don't actually minify them yet + // as that might interfere with local variable names. + traversePre(fun, [&](Ref node) { + if (node[0] == NAME) { + IString name = node[1]->getIString(); + if (!asmData.isLocal(name)) { + if (globals->has(name)) { + IString minified = globals[name]->getIString(); + assert(!!minified); + newNames[name] = minified; + usedNames.insert(minified); + } + } + } + }); + + // The first time we encounter a local name, we assign it a + // minified name that's not currently in use. Allocating on + // demand means they're processed in a predictable order, + // which is very handy for testing/debugging purposes. + int nextMinifiedName = 0; + auto getNextMinifiedName = [&]() { + IString minified; + while (1) { + ensureMinifiedNames(nextMinifiedName); + minified = minifiedNames[nextMinifiedName++]; + // TODO: we can probably remove !isLocalName here + if (!usedNames.has(minified) && !asmData.isLocal(minified)) { + return minified; + } + } + }; + + // We can also minify loop labels, using a separate namespace + // to the variable declarations. + StringStringMap newLabels; + int nextMinifiedLabel = 0; + auto getNextMinifiedLabel = [&]() { + ensureMinifiedNames(nextMinifiedLabel); + return minifiedNames[nextMinifiedLabel++]; + }; + + // Traverse and minify all names. + if (globals->has(fun[1]->getIString())) { + fun[1]->setString(globals[fun[1]->getIString()]->getIString()); + assert(!!fun[1]); + } + if (!!fun[2]) { + for (size_t i = 0; i < fun[2]->size(); i++) { + IString minified = getNextMinifiedName(); + newNames[fun[2][i]->getIString()] = minified; + fun[2][i]->setString(minified); + } + } + traversePre(fun[3], [&](Ref node) { + Ref type = node[0]; + if (type == NAME) { + IString name = node[1]->getIString(); + IString minified = newNames[name]; + if (!!minified) { + node[1]->setString(minified); + } else if (asmData.isLocal(name)) { + minified = getNextMinifiedName(); + newNames[name] = minified; + node[1]->setString(minified); + } + } else if (type == VAR) { + for (size_t i = 0; i < node[1]->size(); i++) { + Ref defn = node[1][i]; + IString name = defn[0]->getIString(); + if (!(newNames.has(name))) { + newNames[name] = getNextMinifiedName(); + } + defn[0]->setString(newNames[name]); + } + } else if (type == LABEL) { + IString name = node[1]->getIString(); + if (!newLabels.has(name)) { + newLabels[name] = getNextMinifiedLabel(); + } + node[1]->setString(newLabels[name]); + } else if (type == BREAK || type == CONTINUE) { + if (node->size() > 1 && !!node[1]) { + node[1]->setString(newLabels[node[1]->getIString()]); + } + } + }); + }); +} + +void asmLastOpts(Ref ast) { + std::vector statsStack; + traverseFunctions(ast, [&](Ref fun) { + traversePrePost(fun, [&](Ref node) { + Ref type = node[0]; + Ref stats = getStatements(node); + if (!!stats) statsStack.push_back(stats); + if (CONDITION_CHECKERS.has(type)) { + node[1] = simplifyCondition(node[1]); + } + if (type == WHILE && node[1][0] == NUM && node[1][1]->getNumber() == 1 && node[2][0] == BLOCK && node[2]->size() == 2) { + // This is at the end of the pipeline, we can assume all other optimizations are done, and we modify loops + // into shapes that might confuse other passes + + // while (1) { .. if (..) { break } } ==> do { .. } while(..) + Ref stats = node[2][1]; + Ref last = stats->back(); + if (!!last && last[0] == IF && (last->size() < 4 || !last[3]) && last[2][0] == BLOCK && !!last[2][1][0]) { + Ref lastStats = last[2][1]; + int lastNum = lastStats->size(); + Ref lastLast = lastStats[lastNum-1]; + if (!(lastLast[0] == BREAK && !lastLast[1])) return;// if not a simple break, dangerous + for (int i = 0; i < lastNum; i++) { + if (lastStats[i][0] != STAT && lastStats[i][0] != BREAK) return; // something dangerous + } + // ok, a bunch of statements ending in a break + bool abort = false; + int stack = 0; + int breaks = 0; + traversePrePost(stats, [&](Ref node) { + Ref type = node[0]; + if (type == CONTINUE) { + if (stack == 0 || !!node[1]) { // abort if labeled (we do not analyze labels here yet), or a continue directly on us + abort = true; + } + } else if (type == BREAK) { + if (stack == 0 || !!node[1]) { // relevant if labeled (we do not analyze labels here yet), or a break directly on us + breaks++; + } + } else if (LOOP.has(type)) { + stack++; + } + }, [&](Ref node) { + if (LOOP.has(node[0])) { + stack--; + } + }); + if (abort) return; + assert(breaks > 0); + if (lastStats->size() > 1 && breaks != 1) return; // if we have code aside from the break, we can only move it out if there is just one break + if (statsStack.size() < 1) return; // no chance we have this stats on hand + // start to optimize + if (lastStats->size() > 1) { + Ref parent = statsStack.back(); + int me = parent->indexOf(node); + if (me < 0) return; // not always directly on a stats, could be in a label for example + parent->insert(me+1, lastStats->size()-1); + for (size_t i = 0; i+1 < lastStats->size(); i++) { + parent[me+1+i] = lastStats[i]; + } + } + Ref conditionToBreak = last[1]; + stats->pop_back(); + node[0]->setString(DO); + node[1] = simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, conditionToBreak)); + } + } else if (type == BINARY) { + if (node[1] == AND) { + if (node[3][0] == UNARY_PREFIX && node[3][1] == MINUS && node[3][2][0] == NUM && node[3][2][1]->getNumber() == 1) { + // Change &-1 into |0, at this point the hint is no longer needed + node[1]->setString(OR); + node[3] = node[3][2]; + node[3][1]->setNumber(0); + } + } else if (node[1] == MINUS && node[3][0] == UNARY_PREFIX) { + // avoid X - (-Y) because some minifiers buggily emit X--Y which is invalid as -- can be a unary. Transform to + // X + Y + if (node[3][1] == MINUS) { // integer + node[1]->setString(PLUS); + node[3] = node[3][2]; + } else if (node[3][1] == PLUS) { // float + if (node[3][2][0] == UNARY_PREFIX && node[3][2][1] == MINUS) { + node[1]->setString(PLUS); + node[3][2] = node[3][2][2]; + } + } + } + } + }, [&](Ref node) { + if (statsStack.size() > 0) { + Ref stats = getStatements(node); + if (!!stats) statsStack.pop_back(); + } + }); + // convert { singleton } into singleton + traversePre(fun, [](Ref node) { + if (node[0] == BLOCK && !!getStatements(node) && node[1]->size() == 1) { + safeCopy(node, node[1][0]); + } + }); + // convert L: do { .. } while(0) into L: { .. } + traversePre(fun, [](Ref node) { + if (node[0] == LABEL && node[1]->isString() /* careful of var label = 5 */ && + node[2][0] == DO && node[2][1][0] == NUM && node[2][1][1]->getNumber() == 0 && node[2][2][0] == BLOCK) { + // there shouldn't be any continues on this, not direct break or continue + IString label = node[1]->getIString(); + bool abort = false; + int breakCaptured = 0, continueCaptured = 0; + traversePrePost(node[2][2], [&](Ref node) { + if (node[0] == CONTINUE) { + if (!node[1] && !continueCaptured) { + abort = true; + } else if (node[1]->isString() && node[1]->getIString() == label) { + abort = true; + } + } + if (node[0] == BREAK && !node[1] && !breakCaptured) { + abort = true; + } + if (BREAK_CAPTURERS.has(node[0])) { + breakCaptured++; + } + if (CONTINUE_CAPTURERS.has(node[0])) { + continueCaptured++; + } + }, [&](Ref node) { + if (BREAK_CAPTURERS.has(node[0])) { + breakCaptured--; + } + if (CONTINUE_CAPTURERS.has(node[0])) { + continueCaptured--; + } + }); + if (abort) return; + safeCopy(node[2], node[2][2]); + } + }); + }); +} + +// Contrary to the name this does not eliminate actual dead functions, only +// those marked as such with DEAD_FUNCTIONS +void eliminateDeadFuncs(Ref ast) { + assert(!!extraInfo); + IString DEAD_FUNCTIONS("dead_functions"); + IString ABORT("abort"); + assert(extraInfo->has(DEAD_FUNCTIONS)); + StringSet deadFunctions; + for (size_t i = 0; i < extraInfo[DEAD_FUNCTIONS]->size(); i++) { + deadFunctions.insert(extraInfo[DEAD_FUNCTIONS][i]->getIString()); + } + traverseFunctions(ast, [&](Ref fun) { + if (!deadFunctions.has(fun[1].get()->getIString())) { + return; + } + AsmData asmData(fun); + fun[3]->setSize(1); + fun[3][0] = make1(STAT, make2(CALL, makeName(ABORT), &(makeArray(1))->push_back(makeNum(-1)))); + asmData.vars.clear(); + asmData.denormalize(); + }); +} + diff --git a/src/emscripten-optimizer/optimizer.h b/src/emscripten-optimizer/optimizer.h new file mode 100644 index 000000000..5edcaad87 --- /dev/null +++ b/src/emscripten-optimizer/optimizer.h @@ -0,0 +1,127 @@ + +#ifndef __optimizer_h__ +#define __optimizer_h__ + +#include "simple_ast.h" + +extern bool preciseF32, + receiveJSON, + emitJSON, + minifyWhitespace, + last; + +extern cashew::Ref extraInfo; + +void eliminateDeadFuncs(cashew::Ref ast); +void eliminate(cashew::Ref ast, bool memSafe=false); +void eliminateMemSafe(cashew::Ref ast); +void simplifyExpressions(cashew::Ref ast); +void optimizeFrounds(cashew::Ref ast); +void simplifyIfs(cashew::Ref ast); +void registerize(cashew::Ref ast); +void registerizeHarder(cashew::Ref ast); +void minifyLocals(cashew::Ref ast); +void asmLastOpts(cashew::Ref ast); + +// + +enum AsmType { + ASM_INT = 0, + ASM_DOUBLE, + ASM_FLOAT, + ASM_FLOAT32X4, + ASM_FLOAT64X2, + ASM_INT8X16, + ASM_INT16X8, + ASM_INT32X4, + ASM_NONE // number of types +}; + +struct AsmData; + +AsmType detectType(cashew::Ref node, AsmData *asmData=nullptr, bool inVarDef=false); + +struct AsmData { + struct Local { + Local() {} + Local(AsmType type, bool param) : type(type), param(param) {} + AsmType type; + bool param; // false if a var + }; + typedef std::unordered_map Locals; + + Locals locals; + std::vector params; // in order + std::vector vars; // in order + AsmType ret; + + cashew::Ref func; + + AsmType getType(const cashew::IString& name) { + auto ret = locals.find(name); + if (ret != locals.end()) return ret->second.type; + return ASM_NONE; + } + void setType(const cashew::IString& name, AsmType type) { + locals[name].type = type; + } + + bool isLocal(const cashew::IString& name) { + return locals.count(name) > 0; + } + bool isParam(const cashew::IString& name) { + return isLocal(name) && locals[name].param; + } + bool isVar(const cashew::IString& name) { + return isLocal(name) && !locals[name].param; + } + + AsmData() {} // if you want to fill in the data yourself + AsmData(cashew::Ref f); // if you want to read data from f, and modify it as you go (parallel to denormalize) + + void denormalize(); + + void addParam(cashew::IString name, AsmType type) { + locals[name] = Local(type, true); + params.push_back(name); + } + void addVar(cashew::IString name, AsmType type) { + locals[name] = Local(type, false); + vars.push_back(name); + } + + void deleteVar(cashew::IString name) { + locals.erase(name); + for (size_t i = 0; i < vars.size(); i++) { + if (vars[i] == name) { + vars.erase(vars.begin() + i); + break; + } + } + } +}; + +bool isInteger(double x); + +bool isInteger32(double x); + +extern cashew::IString ASM_FLOAT_ZERO; + +extern cashew::IString SIMD_INT8X16_CHECK, + SIMD_INT16X8_CHECK, + SIMD_INT32X4_CHECK, + SIMD_FLOAT32X4_CHECK, + SIMD_FLOAT64X2_CHECK; + +int parseInt(const char *str); + +struct HeapInfo { + bool valid, unsign, floaty; + int bits; + AsmType type; +}; + +HeapInfo parseHeap(const char *name); + +#endif // __optimizer_h__ + diff --git a/src/emscripten-optimizer/parser.cpp b/src/emscripten-optimizer/parser.cpp new file mode 100644 index 000000000..06e455b0b --- /dev/null +++ b/src/emscripten-optimizer/parser.cpp @@ -0,0 +1,144 @@ + +#include "parser.h" + +namespace cashew { + +// common strings + +IString TOPLEVEL("toplevel"), + DEFUN("defun"), + BLOCK("block"), + STAT("stat"), + ASSIGN("assign"), + NAME("name"), + VAR("var"), + CONST("const"), + CONDITIONAL("conditional"), + BINARY("binary"), + RETURN("return"), + IF("if"), + ELSE("else"), + WHILE("while"), + DO("do"), + FOR("for"), + SEQ("seq"), + SUB("sub"), + CALL("call"), + NUM("num"), + LABEL("label"), + BREAK("break"), + CONTINUE("continue"), + SWITCH("switch"), + STRING("string"), + INF("inf"), + NaN("nan"), + TEMP_RET0("tempRet0"), + UNARY_PREFIX("unary-prefix"), + UNARY_POSTFIX("unary-postfix"), + MATH_FROUND("Math_fround"), + SIMD_FLOAT32X4("SIMD_Float32x4"), + SIMD_FLOAT64X2("SIMD_Float64x2"), + SIMD_INT8X16("SIMD_Int8x16"), + SIMD_INT16X8("SIMD_Int16x8"), + SIMD_INT32X4("SIMD_Int32x4"), + PLUS("+"), + MINUS("-"), + OR("|"), + AND("&"), + XOR("^"), + L_NOT("!"), + B_NOT("~"), + LT("<"), + GE(">="), + LE("<="), + GT(">"), + EQ("=="), + NE("!="), + DIV("/"), + MOD("%"), + MUL("*"), + RSHIFT(">>"), + LSHIFT("<<"), + TRSHIFT(">>>"), + TEMP_DOUBLE_PTR("tempDoublePtr"), + HEAP8("HEAP8"), + HEAP16("HEAP16"), + HEAP32("HEAP32"), + HEAPF32("HEAPF32"), + HEAPU8("HEAPU8"), + HEAPU16("HEAPU16"), + HEAPU32("HEAPU32"), + HEAPF64("HEAPF64"), + F0("f0"), + EMPTY(""), + FUNCTION("function"), + OPEN_PAREN("("), + OPEN_BRACE("["), + OPEN_CURLY("{"), + CLOSE_CURLY("}"), + COMMA(","), + QUESTION("?"), + COLON(":"), + CASE("case"), + DEFAULT("default"), + DOT("dot"), + PERIOD("."), + NEW("new"), + ARRAY("array"), + OBJECT("object"), + THROW("throw"), + SET("="); + +IStringSet keywords("var const function if else do while for break continue return switch case default throw try catch finally true false null new"); + +const char *OPERATOR_INITS = "+-*/%<>&^|~=!,?:.", + *SEPARATORS = "([;{}"; + +int MAX_OPERATOR_SIZE = 3; + +std::vector operatorClasses; + +static std::vector> precedences; // op, type => prec + +struct Init { + Init() { + // operators, rtl, type + operatorClasses.push_back(OperatorClass(".", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("! ~ + -", true, OperatorClass::Prefix)); + operatorClasses.push_back(OperatorClass("* / %", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("+ -", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("<< >> >>>", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("< <= > >=", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("== !=", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("&", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("^", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("|", false, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass("? :", true, OperatorClass::Tertiary)); + operatorClasses.push_back(OperatorClass("=", true, OperatorClass::Binary)); + operatorClasses.push_back(OperatorClass(",", true, OperatorClass::Binary)); + + precedences.resize(OperatorClass::Tertiary + 1); + + for (size_t prec = 0; prec < operatorClasses.size(); prec++) { + for (auto curr : operatorClasses[prec].ops) { + precedences[operatorClasses[prec].type][curr] = prec; + } + } + } +}; + +Init init; + +int OperatorClass::getPrecedence(Type type, IString op) { + return precedences[type][op]; +} + +bool OperatorClass::getRtl(int prec) { + return operatorClasses[prec].rtl; +} + +bool isIdentInit(char x) { return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || x == '_' || x == '$'; } +bool isIdentPart(char x) { return isIdentInit(x) || (x >= '0' && x <= '9'); } + +} // namespace cashew + diff --git a/src/emscripten-optimizer/parser.h b/src/emscripten-optimizer/parser.h new file mode 100644 index 000000000..8ca07c6ed --- /dev/null +++ b/src/emscripten-optimizer/parser.h @@ -0,0 +1,905 @@ +// Pure parsing. Calls methods on a Builder (template argument) to actually construct the AST +// +// XXX All parsing methods assume they take ownership of the input string. This lets them reuse +// parts of it. You will segfault if the input string cannot be reused and written to. + +#ifndef __parser_h__ +#define __parser_h__ + +#include +#include +#include + +#include + +#include "istring.h" + +namespace cashew { + +// common strings + +extern IString TOPLEVEL, + DEFUN, + BLOCK, + STAT, + ASSIGN, + NAME, + VAR, + CONST, + CONDITIONAL, + BINARY, + RETURN, + IF, + ELSE, + WHILE, + DO, + FOR, + SEQ, + SUB, + CALL, + NUM, + LABEL, + BREAK, + CONTINUE, + SWITCH, + STRING, + INF, + NaN, + TEMP_RET0, + UNARY_PREFIX, + UNARY_POSTFIX, + MATH_FROUND, + SIMD_FLOAT32X4, + SIMD_FLOAT64X2, + SIMD_INT8X16, + SIMD_INT16X8, + SIMD_INT32X4, + PLUS, + MINUS, + OR, + AND, + XOR, + L_NOT, + B_NOT, + LT, + GE, + LE, + GT, + EQ, + NE, + DIV, + MOD, + MUL, + RSHIFT, + LSHIFT, + TRSHIFT, + TEMP_DOUBLE_PTR, + HEAP8, + HEAP16, + HEAP32, + HEAPF32, + HEAPU8, + HEAPU16, + HEAPU32, + HEAPF64, + F0, + EMPTY, + FUNCTION, + OPEN_PAREN, + OPEN_BRACE, + OPEN_CURLY, + CLOSE_CURLY, + COMMA, + QUESTION, + COLON, + CASE, + DEFAULT, + DOT, + PERIOD, + NEW, + ARRAY, + OBJECT, + THROW, + SET; + +extern IStringSet keywords; + +extern const char *OPERATOR_INITS, *SEPARATORS; + +extern int MAX_OPERATOR_SIZE, LOWEST_PREC; + +struct OperatorClass { + enum Type { + Binary = 0, + Prefix = 1, + Postfix = 2, + Tertiary = 3 + }; + + IStringSet ops; + bool rtl; + Type type; + + OperatorClass(const char* o, bool r, Type t) : ops(o), rtl(r), type(t) {} + + static int getPrecedence(Type type, IString op); + static bool getRtl(int prec); +}; + +extern std::vector operatorClasses; + +extern bool isIdentInit(char x); +extern bool isIdentPart(char x); + +// parser + +template +class Parser { + + static bool isSpace(char x) { return x == 32 || x == 9 || x == 10 || x == 13; } /* space, tab, linefeed/newline, or return */ + static void skipSpace(char*& curr) { + while (*curr) { + if (isSpace(*curr)) { + curr++; + continue; + } + if (curr[0] == '/' && curr[1] == '/') { + curr += 2; + while (*curr && *curr != '\n') curr++; + if (*curr) curr++; + continue; + } + if (curr[0] == '/' && curr[1] == '*') { + curr += 2; + while (*curr && (curr[0] != '*' || curr[1] != '/')) curr++; + curr += 2; + continue; + } + return; + } + } + + static bool isDigit(char x) { return x >= '0' && x <= '9'; } + + static bool hasChar(const char* list, char x) { while (*list) if (*list++ == x) return true; return false; } + + static bool is32Bit(double x) { + return x == (int)x || x == (unsigned int)x; + } + + // An atomic fragment of something. Stops at a natural boundary. + enum FragType { + KEYWORD = 0, + OPERATOR = 1, + IDENT = 2, + STRING = 3, // without quotes + INT = 4, + DOUBLE = 5, + SEPARATOR = 6 + }; + + struct Frag { +#ifndef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf + union { +#endif + IString str; + double num; +#ifndef _MSC_VER + }; +#endif + int size; + FragType type; + + bool isNumber() const { + return type == INT || type == DOUBLE; + } + + explicit Frag(char* src) { + char *start = src; + if (isIdentInit(*src)) { + // read an identifier or a keyword + src++; + while (isIdentPart(*src)) { + src++; + } + if (*src == 0) { + str.set(start); + } else { + char temp = *src; + *src = 0; + str.set(start, false); + *src = temp; + } + type = keywords.has(str) ? KEYWORD : IDENT; + } else if (isDigit(*src) || (src[0] == '.' && isDigit(src[1]))) { + if (src[0] == '0' && (src[1] == 'x' || src[1] == 'X')) { + // Explicitly parse hex numbers of form "0x...", because strtod + // supports hex number strings only in C++11, and Visual Studio 2013 does + // not yet support that functionality. + src += 2; + num = 0; + while (1) { + if (*src >= '0' && *src <= '9') { num *= 16; num += *src - '0'; } + else if (*src >= 'a' && *src <= 'f') { num *= 16; num += *src - 'a' + 10; } + else if (*src >= 'A' && *src <= 'F') { num *= 16; num += *src - 'F' + 10; } + else break; + src++; + } + } else { + num = strtod(start, &src); + } + // asm.js must have a '.' for double values. however, we also tolerate + // uglify's tendency to emit without a '.' (and fix it later with a +). + // for valid asm.js input, the '.' should be enough, and for uglify + // in the emscripten optimizer pipeline, we use simple_ast where INT/DOUBLE + // is quite the same at this point anyhow + type = (std::find(start, src, '.') == src && is32Bit(num)) ? INT : DOUBLE; + assert(src > start); + } else if (hasChar(OPERATOR_INITS, *src)) { + switch (*src) { + case '!': str = src[1] == '=' ? NE : L_NOT; break; + case '%': str = MOD; break; + case '&': str = AND; break; + case '*': str = MUL; break; + case '+': str = PLUS; break; + case ',': str = COMMA; break; + case '-': str = MINUS; break; + case '.': str = PERIOD; break; + case '/': str = DIV; break; + case ':': str = COLON; break; + case '<': str = src[1] == '<' ? LSHIFT : (src[1] == '=' ? LE : LT); break; + case '=': str = src[1] == '=' ? EQ : SET; break; + case '>': str = src[1] == '>' ? (src[2] == '>' ? TRSHIFT : RSHIFT) : (src[1] == '=' ? GE : GT); break; + case '?': str = QUESTION; break; + case '^': str = XOR; break; + case '|': str = OR; break; + case '~': str = B_NOT; break; + default: abort(); + } + size = strlen(str.str); +#ifndef NDEBUG + char temp = start[size]; + start[size] = 0; + assert(strcmp(str.str, start) == 0); + start[size] = temp; +#endif + type = OPERATOR; + return; + } else if (hasChar(SEPARATORS, *src)) { + type = SEPARATOR; + char temp = src[1]; + src[1] = 0; + str.set(src, false); + src[1] = temp; + src++; + } else if (*src == '"' || *src == '\'') { + char *end = strchr(src+1, *src); + *end = 0; + str.set(src+1); + src = end+1; + type = STRING; + } else { + dump("frag parsing", src); + abort(); + } + size = src - start; + } + }; + + struct ExpressionElement { + bool isNode; +#ifndef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf + union { +#endif + NodeRef node; + IString op; +#ifndef _MSC_VER + }; +#endif + ExpressionElement(NodeRef n) : isNode(true), node(n) {} + ExpressionElement(IString o) : isNode(false), op(o) {} + + NodeRef getNode() { + assert(isNode); + return node; + } + IString getOp() { + assert(!isNode); + return op; + } + }; + + // This is a list of the current stack of node-operator-node-operator-etc. + // this works by each parseExpression call appending to the vector; then recursing out, and the toplevel sorts it all + typedef std::vector ExpressionParts; + std::vector expressionPartsStack; + + // Parses an element in a list of such elements, e.g. list of statements in a block, or list of parameters in a call + NodeRef parseElement(char*& src, const char* seps=";") { + //dump("parseElement", src); + skipSpace(src); + Frag frag(src); + src += frag.size; + switch (frag.type) { + case KEYWORD: { + return parseAfterKeyword(frag, src, seps); + } + case IDENT: { + return parseAfterIdent(frag, src, seps); + } + case STRING: + case INT: + case DOUBLE: { + return parseExpression(parseFrag(frag), src, seps); + } + case SEPARATOR: { + if (frag.str == OPEN_PAREN) return parseExpression(parseAfterParen(src), src, seps); + if (frag.str == OPEN_BRACE) return parseExpression(parseAfterBrace(src), src, seps); + if (frag.str == OPEN_CURLY) return parseExpression(parseAfterCurly(src), src, seps); + abort(); + } + case OPERATOR: { + return parseExpression(frag.str, src, seps); + } + default: /* dump("parseElement", src); printf("bad frag type: %d\n", frag.type); */ abort(); + } + return nullptr; + } + + NodeRef parseFrag(Frag& frag) { + switch (frag.type) { + case IDENT: return Builder::makeName(frag.str); + case STRING: return Builder::makeString(frag.str); + case INT: return Builder::makeInt(uint32_t(frag.num)); + case DOUBLE: return Builder::makeDouble(frag.num); + default: abort(); + } + return nullptr; + } + + NodeRef parseAfterKeyword(Frag& frag, char*& src, const char* seps) { + skipSpace(src); + if (frag.str == FUNCTION) return parseFunction(src, seps); + else if (frag.str == VAR) return parseVar(src, seps, false); + else if (frag.str == CONST) return parseVar(src, seps, true); + else if (frag.str == RETURN) return parseReturn(src, seps); + else if (frag.str == IF) return parseIf(src, seps); + else if (frag.str == DO) return parseDo(src, seps); + else if (frag.str == WHILE) return parseWhile(src, seps); + else if (frag.str == BREAK) return parseBreak(src, seps); + else if (frag.str == CONTINUE) return parseContinue(src, seps); + else if (frag.str == SWITCH) return parseSwitch(src, seps); + else if (frag.str == NEW) return parseNew(src, seps); + dump(frag.str.str, src); + abort(); + return nullptr; + } + + NodeRef parseFunction(char*& src, const char* seps) { + Frag name(src); + if (name.type == IDENT) { + src += name.size; + } else { + assert(name.type == SEPARATOR && name.str[0] == '('); + name.str = IString(); + } + NodeRef ret = Builder::makeFunction(name.str); + skipSpace(src); + assert(*src == '('); + src++; + while (1) { + skipSpace(src); + if (*src == ')') break; + Frag arg(src); + assert(arg.type == IDENT); + src += arg.size; + Builder::appendArgumentToFunction(ret, arg.str); + skipSpace(src); + if (*src == ')') break; + if (*src == ',') { + src++; + continue; + } + abort(); + } + src++; + Builder::setBlockContent(ret, parseBracketedBlock(src)); + // TODO: parse expression? + return ret; + } + + NodeRef parseVar(char*& src, const char* seps, bool is_const) { + NodeRef ret = Builder::makeVar(is_const); + while (1) { + skipSpace(src); + if (*src == ';') break; + Frag name(src); + assert(name.type == IDENT); + NodeRef value; + src += name.size; + skipSpace(src); + if (*src == '=') { + src++; + skipSpace(src); + value = parseElement(src, ";,"); + } + Builder::appendToVar(ret, name.str, value); + skipSpace(src); + if (*src == ';') break; + if (*src == ',') { + src++; + continue; + } + abort(); + } + src++; + return ret; + } + + NodeRef parseReturn(char*& src, const char* seps) { + skipSpace(src); + NodeRef value = !hasChar(seps, *src) ? parseElement(src, seps) : nullptr; + skipSpace(src); + assert(hasChar(seps, *src)); + if (*src == ';') src++; + return Builder::makeReturn(value); + } + + NodeRef parseIf(char*& src, const char* seps) { + NodeRef condition = parseParenned(src); + NodeRef ifTrue = parseMaybeBracketed(src, seps); + skipSpace(src); + NodeRef ifFalse; + if (!hasChar(seps, *src)) { + Frag next(src); + if (next.type == KEYWORD && next.str == ELSE) { + src += next.size; + ifFalse = parseMaybeBracketed(src, seps); + } + } + return Builder::makeIf(condition, ifTrue, ifFalse); + } + + NodeRef parseDo(char*& src, const char* seps) { + NodeRef body = parseMaybeBracketed(src, seps); + skipSpace(src); + Frag next(src); + assert(next.type == KEYWORD && next.str == WHILE); + src += next.size; + NodeRef condition = parseParenned(src); + return Builder::makeDo(body, condition); + } + + NodeRef parseWhile(char*& src, const char* seps) { + NodeRef condition = parseParenned(src); + NodeRef body = parseMaybeBracketed(src, seps); + return Builder::makeWhile(condition, body); + } + + NodeRef parseBreak(char*& src, const char* seps) { + skipSpace(src); + Frag next(src); + if (next.type == IDENT) src += next.size; + return Builder::makeBreak(next.type == IDENT ? next.str : IString()); + } + + NodeRef parseContinue(char*& src, const char* seps) { + skipSpace(src); + Frag next(src); + if (next.type == IDENT) src += next.size; + return Builder::makeContinue(next.type == IDENT ? next.str : IString()); + } + + NodeRef parseSwitch(char*& src, const char* seps) { + NodeRef ret = Builder::makeSwitch(parseParenned(src)); + skipSpace(src); + assert(*src == '{'); + src++; + while (1) { + // find all cases and possibly a default + skipSpace(src); + if (*src == '}') break; + Frag next(src); + if (next.type == KEYWORD) { + if (next.str == CASE) { + src += next.size; + skipSpace(src); + NodeRef arg; + Frag value(src); + if (value.isNumber()) { + arg = parseFrag(value); + src += value.size; + } else { + assert(value.type == OPERATOR); + assert(value.str == MINUS); + src += value.size; + skipSpace(src); + Frag value2(src); + assert(value2.isNumber()); + arg = Builder::makePrefix(MINUS, parseFrag(value2)); + src += value2.size; + } + Builder::appendCaseToSwitch(ret, arg); + skipSpace(src); + assert(*src == ':'); + src++; + continue; + } else if (next.str == DEFAULT) { + src += next.size; + Builder::appendDefaultToSwitch(ret); + skipSpace(src); + assert(*src == ':'); + src++; + continue; + } + // otherwise, may be some keyword that happens to start a block (e.g. case 1: _return_ 5) + } + // not case X: or default: or }, so must be some code + skipSpace(src); + bool explicitBlock = *src == '{'; + NodeRef subBlock = explicitBlock ? parseBracketedBlock(src) : parseBlock(src, ";}", CASE, DEFAULT); + Builder::appendCodeToSwitch(ret, subBlock, explicitBlock); + } + skipSpace(src); + assert(*src == '}'); + src++; + return ret; + } + + NodeRef parseNew(char*& src, const char* seps) { + return Builder::makeNew(parseElement(src, seps)); + } + + NodeRef parseAfterIdent(Frag& frag, char*& src, const char* seps) { + skipSpace(src); + if (*src == '(') return parseExpression(parseCall(parseFrag(frag), src), src, seps); + if (*src == '[') return parseExpression(parseIndexing(parseFrag(frag), src), src, seps); + if (*src == ':' && expressionPartsStack.back().size() == 0) { + src++; + skipSpace(src); + NodeRef inner; + if (*src == '{') { // context lets us know this is not an object, but a block + inner = parseBracketedBlock(src); + } else { + inner = parseElement(src, seps); + } + return Builder::makeLabel(frag.str, inner); + } + if (*src == '.') return parseExpression(parseDotting(parseFrag(frag), src), src, seps); + return parseExpression(parseFrag(frag), src, seps); + } + + NodeRef parseCall(NodeRef target, char*& src) { + expressionPartsStack.resize(expressionPartsStack.size()+1); + assert(*src == '('); + src++; + NodeRef ret = Builder::makeCall(target); + while (1) { + skipSpace(src); + if (*src == ')') break; + Builder::appendToCall(ret, parseElement(src, ",)")); + skipSpace(src); + if (*src == ')') break; + if (*src == ',') { + src++; + continue; + } + abort(); + } + src++; + assert(expressionPartsStack.back().size() == 0); + expressionPartsStack.pop_back(); + return ret; + } + + NodeRef parseIndexing(NodeRef target, char*& src) { + expressionPartsStack.resize(expressionPartsStack.size()+1); + assert(*src == '['); + src++; + NodeRef ret = Builder::makeIndexing(target, parseElement(src, "]")); + skipSpace(src); + assert(*src == ']'); + src++; + assert(expressionPartsStack.back().size() == 0); + expressionPartsStack.pop_back(); + return ret; + } + + NodeRef parseDotting(NodeRef target, char*& src) { + assert(*src == '.'); + src++; + Frag key(src); + assert(key.type == IDENT); + src += key.size; + return Builder::makeDot(target, key.str); + } + + NodeRef parseAfterParen(char*& src) { + expressionPartsStack.resize(expressionPartsStack.size()+1); + skipSpace(src); + NodeRef ret = parseElement(src, ")"); + skipSpace(src); + assert(*src == ')'); + src++; + assert(expressionPartsStack.back().size() == 0); + expressionPartsStack.pop_back(); + return ret; + } + + NodeRef parseAfterBrace(char*& src) { + expressionPartsStack.resize(expressionPartsStack.size()+1); + NodeRef ret = Builder::makeArray(); + while (1) { + skipSpace(src); + assert(*src); + if (*src == ']') break; + NodeRef element = parseElement(src, ",]"); + Builder::appendToArray(ret, element); + skipSpace(src); + if (*src == ']') break; + if (*src == ',') { + src++; + continue; + } + abort(); + } + src++; + return ret; + } + + NodeRef parseAfterCurly(char*& src) { + expressionPartsStack.resize(expressionPartsStack.size()+1); + NodeRef ret = Builder::makeObject(); + while (1) { + skipSpace(src); + assert(*src); + if (*src == '}') break; + Frag key(src); + assert(key.type == IDENT || key.type == STRING); + src += key.size; + skipSpace(src); + assert(*src == ':'); + src++; + NodeRef value = parseElement(src, ",}"); + Builder::appendToObject(ret, key.str, value); + skipSpace(src); + if (*src == '}') break; + if (*src == ',') { + src++; + continue; + } + abort(); + } + src++; + return ret; + } + + void dumpParts(ExpressionParts& parts, int i) { + printf("expressionparts: %d (at %d)\n", parts.size(), i); + printf("| "); + for (int i = 0; i < parts.size(); i++) { + if (parts[i].isNode) { + parts[i].getNode()->stringify(std::cout); + printf(" "); + } else { + printf(" _%s_ ", parts[i].getOp().str); + } + } + printf("|\n"); + } + + NodeRef makeBinary(NodeRef left, IString op, NodeRef right) { + if (op == PERIOD) { + return Builder::makeDot(left, right); + } else { + return Builder::makeBinary(left, op ,right); + } + } + + NodeRef parseExpression(ExpressionElement initial, char*&src, const char* seps) { + //dump("parseExpression", src); + ExpressionParts& parts = expressionPartsStack.back(); + skipSpace(src); + if (*src == 0 || hasChar(seps, *src)) { + if (parts.size() > 0) { + parts.push_back(initial); // cherry on top of the cake + } + return initial.getNode(); + } + bool top = parts.size() == 0; + if (initial.isNode) { + Frag next(src); + if (next.type == OPERATOR) { + parts.push_back(initial); + src += next.size; + parts.push_back(next.str); + } else { + if (*src == '(') { + initial = parseCall(initial.getNode(), src); + } else if (*src == '[') { + initial = parseIndexing(initial.getNode(), src); + } else { + dump("bad parseExpression state", src); + abort(); + } + return parseExpression(initial, src, seps); + } + } else { + parts.push_back(initial); + } + NodeRef last = parseElement(src, seps); + if (!top) return last; + { + ExpressionParts& parts = expressionPartsStack.back(); // |parts| may have been invalidated by that call + // we are the toplevel. sort it all out + // collapse right to left, highest priority first + //dumpParts(parts, 0); + for (auto& ops : operatorClasses) { + if (ops.rtl) { + // right to left + for (int i = parts.size()-1; i >= 0; i--) { + if (parts[i].isNode) continue; + IString op = parts[i].getOp(); + if (!ops.ops.has(op)) continue; + if (ops.type == OperatorClass::Binary && i > 0 && i < (int)parts.size()-1) { + parts[i] = makeBinary(parts[i-1].getNode(), op, parts[i+1].getNode()); + parts.erase(parts.begin() + i + 1); + parts.erase(parts.begin() + i - 1); + } else if (ops.type == OperatorClass::Prefix && i < (int)parts.size()-1) { + if (i > 0 && parts[i-1].isNode) continue; // cannot apply prefix operator if it would join two nodes + parts[i] = Builder::makePrefix(op, parts[i+1].getNode()); + parts.erase(parts.begin() + i + 1); + } else if (ops.type == OperatorClass::Tertiary) { + // we must be at X ? Y : Z + // ^ + //dumpParts(parts, i); + if (op != COLON) continue; + assert(i < (int)parts.size()-1 && i >= 3); + if (parts[i-2].getOp() != QUESTION) continue; // e.g. x ? y ? 1 : 0 : 2 + parts[i-3] = Builder::makeConditional(parts[i-3].getNode(), parts[i-1].getNode(), parts[i+1].getNode()); + parts.erase(parts.begin() + i - 2, parts.begin() + i + 2); + i = parts.size(); // basically a reset, due to things like x ? y ? 1 : 0 : 2 + } // TODO: postfix + } + } else { + // left to right + for (int i = 0; i < (int)parts.size(); i++) { + if (parts[i].isNode) continue; + IString op = parts[i].getOp(); + if (!ops.ops.has(op)) continue; + if (ops.type == OperatorClass::Binary && i > 0 && i < (int)parts.size()-1) { + parts[i] = makeBinary(parts[i-1].getNode(), op, parts[i+1].getNode()); + parts.erase(parts.begin() + i + 1); + parts.erase(parts.begin() + i - 1); + i--; + } else if (ops.type == OperatorClass::Prefix && i < (int)parts.size()-1) { + if (i > 0 && parts[i-1].isNode) continue; // cannot apply prefix operator if it would join two nodes + parts[i] = Builder::makePrefix(op, parts[i+1].getNode()); + parts.erase(parts.begin() + i + 1); + i = std::max(i-2, 0); // allow a previous prefix operator to cascade + } // TODO: tertiary, postfix + } + } + } + assert(parts.size() == 1); + NodeRef ret = parts[0].getNode(); + parts.clear(); + return ret; + } + } + + // Parses a block of code (e.g. a bunch of statements inside {,}, or the top level of o file) + NodeRef parseBlock(char*& src, const char* seps=";", IString keywordSep1=IString(), IString keywordSep2=IString()) { + NodeRef block = Builder::makeBlock(); + //dump("parseBlock", src); + while (1) { + skipSpace(src); + if (*src == 0) break; + if (*src == ';') { + src++; // skip a statement in this block + continue; + } + if (hasChar(seps, *src)) break; + if (!!keywordSep1) { + Frag next(src); + if (next.type == KEYWORD && next.str == keywordSep1) break; + } + if (!!keywordSep2) { + Frag next(src); + if (next.type == KEYWORD && next.str == keywordSep2) break; + } + NodeRef element = parseElementOrStatement(src, seps); + Builder::appendToBlock(block, element); + } + return block; + } + + NodeRef parseBracketedBlock(char*& src) { + skipSpace(src); + assert(*src == '{'); + src++; + NodeRef block = parseBlock(src, ";}"); // the two are not symmetrical, ; is just internally separating, } is the final one - parseBlock knows all this + assert(*src == '}'); + src++; + return block; + } + + NodeRef parseElementOrStatement(char*& src, const char *seps) { + skipSpace(src); + if (*src == ';') { + src++; + return Builder::makeBlock(); // we don't need the brackets here, but oh well + } + NodeRef ret = parseElement(src, seps); + skipSpace(src); + if (*src == ';') { + ret = Builder::makeStatement(ret); + src++; + } + return ret; + } + + NodeRef parseMaybeBracketed(char*& src, const char *seps) { + skipSpace(src); + return *src == '{' ? parseBracketedBlock(src) : parseElementOrStatement(src, seps); + } + + NodeRef parseParenned(char*& src) { + skipSpace(src); + assert(*src == '('); + src++; + NodeRef ret = parseElement(src, ")"); + skipSpace(src); + assert(*src == ')'); + src++; + return ret; + } + + // Debugging + + char *allSource; + int allSize; + + static void dump(const char *where, char* curr) { + /* + printf("%s:\n=============\n", where); + for (int i = 0; i < allSize; i++) printf("%c", allSource[i] ? allSource[i] : '?'); + printf("\n"); + for (int i = 0; i < (curr - allSource); i++) printf(" "); + printf("^\n=============\n"); + */ + fprintf(stderr, "%s:\n==========\n", where); + int newlinesLeft = 2; + int charsLeft = 200; + while (*curr) { + if (*curr == '\n') { + newlinesLeft--; + if (newlinesLeft == 0) break; + } + charsLeft--; + if (charsLeft == 0) break; + fprintf(stderr, "%c", *curr++); + } + fprintf(stderr, "\n\n"); + } + +public: + + Parser() : allSource(nullptr), allSize(0) { + expressionPartsStack.resize(1); + } + + // Highest-level parsing, as of a JavaScript script file. + NodeRef parseToplevel(char* src) { + allSource = src; + allSize = strlen(src); + NodeRef toplevel = Builder::makeToplevel(); + Builder::setBlockContent(toplevel, parseBlock(src)); + return toplevel; + } +}; + +} // namespace cashew + +#endif // __parser_h__ + diff --git a/src/emscripten-optimizer/simple_ast.cpp b/src/emscripten-optimizer/simple_ast.cpp new file mode 100644 index 000000000..89b8ce8ed --- /dev/null +++ b/src/emscripten-optimizer/simple_ast.cpp @@ -0,0 +1,259 @@ + +#include "simple_ast.h" + +namespace cashew { + +// Ref methods + +Ref& Ref::operator[](unsigned x) { + return (*get())[x]; +} + +Ref& Ref::operator[](IString x) { + return (*get())[x]; +} + +bool Ref::operator==(const char *str) { + return get()->isString() && !strcmp(get()->str.str, str); +} + +bool Ref::operator!=(const char *str) { + return get()->isString() ? !!strcmp(get()->str.str, str) : true; +} + +bool Ref::operator==(const IString &str) { + return get()->isString() && get()->str == str; +} + +bool Ref::operator!=(const IString &str) { + return get()->isString() && get()->str != str; +} + +bool Ref::operator==(Ref other) { + return **this == *other; +} + +bool Ref::operator!() { + return !get() || get()->isNull(); +} + +// Arena + +Arena arena; + +Ref Arena::alloc() { + if (chunks.size() == 0 || index == CHUNK_SIZE) { + chunks.push_back(new Value[CHUNK_SIZE]); + index = 0; + } + return &chunks.back()[index++]; +} + +ArrayStorage* Arena::allocArray() { + if (arr_chunks.size() == 0 || arr_index == CHUNK_SIZE) { + arr_chunks.push_back(new ArrayStorage[CHUNK_SIZE]); + arr_index = 0; + } + return &arr_chunks.back()[arr_index++]; +} + +// dump + +void dump(const char *str, Ref node, bool pretty) { + std::cerr << str << ": "; + if (!!node) node->stringify(std::cerr, pretty); + else std::cerr << "(nullptr)"; + std::cerr << std::endl; +} + +// AST traversals + +// Traversals + +struct TraverseInfo { + TraverseInfo() {} + TraverseInfo(Ref node, ArrayStorage* arr) : node(node), arr(arr), index(0) {} + Ref node; + ArrayStorage* arr; + int index; +}; + +template +struct StackedStack { // a stack, on the stack + T stackStorage[init]; + T* storage; + int used, available; // used amount, available amount + bool alloced; + + StackedStack() : used(0), available(init), alloced(false) { + storage = stackStorage; + } + ~StackedStack() { + if (alloced) free(storage); + } + + int size() { return used; } + + void push_back(const T& t) { + assert(used <= available); + if (used == available) { + available *= 2; + if (!alloced) { + T* old = storage; + storage = (T*)malloc(sizeof(T)*available); + memcpy(storage, old, sizeof(T)*used); + alloced = true; + } else { + T *newStorage = (T*)realloc(storage, sizeof(T)*available); + assert(newStorage); + storage = newStorage; + } + } + assert(used < available); + assert(storage); + storage[used++] = t; + } + + T& back() { + assert(used > 0); + return storage[used-1]; + } + + void pop_back() { + assert(used > 0); + used--; + } +}; + +#define visitable(node) (node->isArray() && node->size() > 0) + +#define TRAV_STACK 40 + +// Traverse, calling visit before the children +void traversePre(Ref node, std::function visit) { + if (!visitable(node)) return; + visit(node); + StackedStack stack; + int index = 0; + ArrayStorage* arr = &node->getArray(); + int arrsize = (int)arr->size(); + Ref* arrdata = arr->data(); + stack.push_back(TraverseInfo(node, arr)); + while (1) { + if (index < arrsize) { + Ref sub = *(arrdata+index); + index++; + if (visitable(sub)) { + stack.back().index = index; + index = 0; + visit(sub); + arr = &sub->getArray(); + arrsize = (int)arr->size(); + arrdata = arr->data(); + stack.push_back(TraverseInfo(sub, arr)); + } + } else { + stack.pop_back(); + if (stack.size() == 0) break; + TraverseInfo& back = stack.back(); + index = back.index; + arr = back.arr; + arrsize = (int)arr->size(); + arrdata = arr->data(); + } + } +} + +// Traverse, calling visitPre before the children and visitPost after +void traversePrePost(Ref node, std::function visitPre, std::function visitPost) { + if (!visitable(node)) return; + visitPre(node); + StackedStack stack; + int index = 0; + ArrayStorage* arr = &node->getArray(); + int arrsize = (int)arr->size(); + Ref* arrdata = arr->data(); + stack.push_back(TraverseInfo(node, arr)); + while (1) { + if (index < arrsize) { + Ref sub = *(arrdata+index); + index++; + if (visitable(sub)) { + stack.back().index = index; + index = 0; + visitPre(sub); + arr = &sub->getArray(); + arrsize = (int)arr->size(); + arrdata = arr->data(); + stack.push_back(TraverseInfo(sub, arr)); + } + } else { + visitPost(stack.back().node); + stack.pop_back(); + if (stack.size() == 0) break; + TraverseInfo& back = stack.back(); + index = back.index; + arr = back.arr; + arrsize = (int)arr->size(); + arrdata = arr->data(); + } + } +} + +// Traverse, calling visitPre before the children and visitPost after. If pre returns false, do not traverse children +void traversePrePostConditional(Ref node, std::function visitPre, std::function visitPost) { + if (!visitable(node)) return; + if (!visitPre(node)) return; + StackedStack stack; + int index = 0; + ArrayStorage* arr = &node->getArray(); + int arrsize = (int)arr->size(); + Ref* arrdata = arr->data(); + stack.push_back(TraverseInfo(node, arr)); + while (1) { + if (index < arrsize) { + Ref sub = *(arrdata+index); + index++; + if (visitable(sub)) { + if (visitPre(sub)) { + stack.back().index = index; + index = 0; + arr = &sub->getArray(); + arrsize = (int)arr->size(); + arrdata = arr->data(); + stack.push_back(TraverseInfo(sub, arr)); + } + } + } else { + visitPost(stack.back().node); + stack.pop_back(); + if (stack.size() == 0) break; + TraverseInfo& back = stack.back(); + index = back.index; + arr = back.arr; + arrsize = (int)arr->size(); + arrdata = arr->data(); + } + } +} + +// Traverses all the top-level functions in the document +void traverseFunctions(Ref ast, std::function visit) { + if (!ast || ast->size() == 0) return; + if (ast[0] == TOPLEVEL) { + Ref stats = ast[1]; + for (size_t i = 0; i < stats->size(); i++) { + Ref curr = stats[i]; + if (curr[0] == DEFUN) visit(curr); + } + } else if (ast[0] == DEFUN) { + visit(ast); + } +} + +// ValueBuilder + +IStringSet ValueBuilder::statable("assign call binary unary-prefix if name num conditional dot new sub seq string object array"); + +} // namespace cashew + diff --git a/src/emscripten-optimizer/simple_ast.h b/src/emscripten-optimizer/simple_ast.h new file mode 100644 index 000000000..4012ff8f6 --- /dev/null +++ b/src/emscripten-optimizer/simple_ast.h @@ -0,0 +1,1542 @@ + +#ifndef __simple_ast_h__ +#define __simple_ast_h__ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "parser.h" + +#include "snprintf.h" + +#define err(str) fprintf(stderr, str "\n"); +#define errv(str, ...) fprintf(stderr, str "\n", __VA_ARGS__); +#define printErr err + +namespace cashew { + +struct Ref; +struct Value; + +void dump(const char *str, Ref node, bool pretty=false); + +// Reference to a value, plus some operators for convenience +struct Ref { + Value* inst; + + Ref(Value *v=nullptr) : inst(v) {} + + Value* get() { return inst; } + + Value& operator*() { return *inst; } + Value* operator->() { return inst; } + Ref& operator[](unsigned x); + Ref& operator[](IString x); + + // special conveniences + bool operator==(const char *str); // comparison to string, which is by value + bool operator!=(const char *str); + bool operator==(const IString &str); + bool operator!=(const IString &str); + bool operator==(double d) { abort(); return false; } // prevent Ref == number, which is potentially ambiguous; use ->getNumber() == number + bool operator==(Ref other); + bool operator!(); // check if null, in effect +}; + +// Arena allocation, free it all on process exit + +typedef std::vector ArrayStorage; + +struct Arena { + #define CHUNK_SIZE 1000 + std::vector chunks; + int index; // in last chunk + + std::vector arr_chunks; + int arr_index; + + Arena() : index(0), arr_index(0) {} + + Ref alloc(); + ArrayStorage* allocArray(); +}; + +extern Arena arena; + +// Main value type +struct Value { + enum Type { + String = 0, + Number = 1, + Array = 2, + Null = 3, + Bool = 4, + Object = 5 + }; + + Type type; + + typedef std::unordered_map ObjectStorage; + +#ifdef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf + IString str; +#endif + union { // TODO: optimize +#ifndef _MSC_VER + IString str; +#endif + double num; + ArrayStorage *arr; + bool boo; + ObjectStorage *obj; + }; + + // constructors all copy their input + Value() : type(Null), num(0) {} + explicit Value(const char *s) : type(Null) { + setString(s); + } + explicit Value(double n) : type(Null) { + setNumber(n); + } + explicit Value(ArrayStorage &a) : type(Null) { + setArray(); + *arr = a; + } + // no bool constructor - would endanger the double one (int might convert the wrong way) + + ~Value() { + free(); + } + + void free() { + if (type == Array) { arr->clear(); arr->shrink_to_fit(); } + else if (type == Object) delete obj; + type = Null; + num = 0; + } + + Value& setString(const char *s) { + free(); + type = String; + str.set(s); + return *this; + } + Value& setString(const IString &s) { + free(); + type = String; + str.set(s); + return *this; + } + Value& setNumber(double n) { + free(); + type = Number; + num = n; + return *this; + } + Value& setArray(ArrayStorage &a) { + free(); + type = Array; + arr = arena.allocArray(); + *arr = a; + return *this; + } + Value& setArray(int size_hint=0) { + free(); + type = Array; + arr = arena.allocArray(); + arr->reserve(size_hint); + return *this; + } + Value& setNull() { + free(); + type = Null; + return *this; + } + Value& setBool(bool b) { // Bool in the name, as otherwise might overload over int + free(); + type = Bool; + boo = b; + return *this; + } + Value& setObject() { + free(); + type = Object; + obj = new ObjectStorage(); + return *this; + } + + bool isString() { return type == String; } + bool isNumber() { return type == Number; } + bool isArray() { return type == Array; } + bool isNull() { return type == Null; } + bool isBool() { return type == Bool; } + bool isObject() { return type == Object; } + + bool isBool(bool b) { return type == Bool && b == boo; } // avoid overloading == as it might overload over int + + const char* getCString() { + assert(isString()); + return str.str; + } + IString& getIString() { + assert(isString()); + return str; + } + double& getNumber() { + assert(isNumber()); + return num; + } + ArrayStorage& getArray() { + assert(isArray()); + return *arr; + } + bool& getBool() { + assert(isBool()); + return boo; + } + + int getInteger() { // convenience function to get a known integer + assert(fmod(getNumber(), 1) == 0); + int ret = int(getNumber()); + assert(double(ret) == getNumber()); // no loss in conversion + return ret; + } + + Value& operator=(const Value& other) { + free(); + switch (other.type) { + case String: + setString(other.str); + break; + case Number: + setNumber(other.num); + break; + case Array: + setArray(*other.arr); + break; + case Null: + setNull(); + break; + case Bool: + setBool(other.boo); + break; + case Object: + abort(); // TODO + } + return *this; + } + + bool operator==(const Value& other) { + if (type != other.type) return false; + switch (other.type) { + case String: + return str == other.str; + case Number: + return num == other.num; + case Array: + return this == &other; // if you want a deep compare, use deepCompare + case Null: + break; + case Bool: + return boo == other.boo; + case Object: + return this == &other; // if you want a deep compare, use deepCompare + } + return true; + } + + bool deepCompare(Ref ref) { + Value& other = *ref; + if (*this == other) return true; // either same pointer, or identical value type (string, number, null or bool) + if (type != other.type) return false; + if (type == Array) { + if (arr->size() != other.arr->size()) return false; + for (unsigned i = 0; i < arr->size(); i++) { + if (!(*arr)[i]->deepCompare((*other.arr)[i])) return false; + } + return true; + } else if (type == Object) { + if (obj->size() != other.obj->size()) return false; + for (auto i : *obj) { + if (other.obj->count(i.first) == 0) return false; + if (i.second->deepCompare((*other.obj)[i.first])) return false; + } + return true; + } + return false; + } + + char* parse(char* curr) { + #define is_json_space(x) (x == 32 || x == 9 || x == 10 || x == 13) /* space, tab, linefeed/newline, or return */ + #define skip() { while (*curr && is_json_space(*curr)) curr++; } + skip(); + if (*curr == '"') { + // String + curr++; + char *close = strchr(curr, '"'); + assert(close); + *close = 0; // end this string, and reuse it straight from the input + setString(curr); + curr = close+1; + } else if (*curr == '[') { + // Array + curr++; + skip(); + setArray(); + while (*curr != ']') { + Ref temp = arena.alloc(); + arr->push_back(temp); + curr = temp->parse(curr); + skip(); + if (*curr == ']') break; + assert(*curr == ','); + curr++; + skip(); + } + curr++; + } else if (*curr == 'n') { + // Null + assert(strncmp(curr, "null", 4) == 0); + setNull(); + curr += 4; + } else if (*curr == 't') { + // Bool true + assert(strncmp(curr, "true", 4) == 0); + setBool(true); + curr += 4; + } else if (*curr == 'f') { + // Bool false + assert(strncmp(curr, "false", 5) == 0); + setBool(false); + curr += 5; + } else if (*curr == '{') { + // Object + curr++; + skip(); + setObject(); + while (*curr != '}') { + assert(*curr == '"'); + curr++; + char *close = strchr(curr, '"'); + assert(close); + *close = 0; // end this string, and reuse it straight from the input + IString key(curr); + curr = close+1; + skip(); + assert(*curr == ':'); + curr++; + skip(); + Ref value = arena.alloc(); + curr = value->parse(curr); + (*obj)[key] = value; + skip(); + if (*curr == '}') break; + assert(*curr == ','); + curr++; + skip(); + } + curr++; + } else { + // Number + char *after; + setNumber(strtod(curr, &after)); + curr = after; + } + return curr; + } + + void stringify(std::ostream &os, bool pretty=false) { + static int indent = 0; + #define indentify() { for (int i = 0; i < indent; i++) os << " "; } + switch (type) { + case String: + os << '"' << str.str << '"'; + break; + case Number: + os << std::setprecision(17) << num; // doubles can have 17 digits of precision + break; + case Array: + if (arr->size() == 0) { + os << "[]"; + break; + } + os << '['; + if (pretty) { + os << std::endl; + indent++; + } + for (unsigned i = 0; i < arr->size(); i++) { + if (i > 0) { + if (pretty) os << "," << std::endl; + else os << ", "; + } + indentify(); + (*arr)[i]->stringify(os, pretty); + } + if (pretty) { + os << std::endl; + indent--; + } + indentify(); + os << ']'; + break; + case Null: + os << "null"; + break; + case Bool: + os << (boo ? "true" : "false"); + break; + case Object: + os << '{'; + if (pretty) { + os << std::endl; + indent++; + } + bool first = true; + for (auto i : *obj) { + if (first) { + first = false; + } else { + os << ", "; + if (pretty) os << std::endl; + } + indentify(); + os << '"' << i.first.c_str() << "\": "; + i.second->stringify(os, pretty); + } + if (pretty) { + os << std::endl; + indent--; + } + indentify(); + os << '}'; + break; + } + } + + // String operations + + // Number operations + + // Array operations + + unsigned size() { + assert(isArray()); + return arr->size(); + } + + void setSize(unsigned size) { + assert(isArray()); + unsigned old = arr->size(); + if (old != size) arr->resize(size); + if (old < size) { + for (unsigned i = old; i < size; i++) { + (*arr)[i] = arena.alloc(); + } + } + } + + Ref& operator[](unsigned x) { + assert(isArray()); + return arr->at(x); + } + + Value& push_back(Ref r) { + assert(isArray()); + arr->push_back(r); + return *this; + } + Ref pop_back() { + assert(isArray()); + Ref ret = arr->back(); + arr->pop_back(); + return ret; + } + + Ref back() { + assert(isArray()); + if (arr->size() == 0) return nullptr; + return arr->back(); + } + + void splice(int x, int num) { + assert(isArray()); + arr->erase(arr->begin() + x, arr->begin() + x + num); + } + + void insert(int x, int num) { + arr->insert(arr->begin() + x, num, Ref()); + } + void insert(int x, Ref node) { + arr->insert(arr->begin() + x, 1, node); + } + + int indexOf(Ref other) { + assert(isArray()); + for (unsigned i = 0; i < arr->size(); i++) { + if (other == (*arr)[i]) return i; + } + return -1; + } + + Ref map(std::function func) { + assert(isArray()); + Ref ret = arena.alloc(); + ret->setArray(); + for (unsigned i = 0; i < arr->size(); i++) { + ret->push_back(func((*arr)[i])); + } + return ret; + } + + Ref filter(std::function func) { + assert(isArray()); + Ref ret = arena.alloc(); + ret->setArray(); + for (unsigned i = 0; i < arr->size(); i++) { + Ref curr = (*arr)[i]; + if (func(curr)) ret->push_back(curr); + } + return ret; + } + + /* + void forEach(std::function func) { + for (unsigned i = 0; i < arr->size(); i++) { + func((*arr)[i]); + } + } + */ + + // Null operations + + // Bool operations + + // Object operations + + Ref& operator[](IString x) { + assert(isObject()); + return (*obj)[x]; + } + + bool has(IString x) { + assert(isObject()); + return obj->count(x) > 0; + } +}; + +// AST traversals + +// Traverse, calling visit before the children +void traversePre(Ref node, std::function visit); + +// Traverse, calling visitPre before the children and visitPost after +void traversePrePost(Ref node, std::function visitPre, std::function visitPost); + +// Traverse, calling visitPre before the children and visitPost after. If pre returns false, do not traverse children +void traversePrePostConditional(Ref node, std::function visitPre, std::function visitPost); + +// Traverses all the top-level functions in the document +void traverseFunctions(Ref ast, std::function visit); + +// JS printer + +struct JSPrinter { + bool pretty, finalize; + + char *buffer; + int size, used; + + int indent; + bool possibleSpace; // add a space to separate identifiers + + Ref ast; + + JSPrinter(bool pretty_, bool finalize_, Ref ast_) : pretty(pretty_), finalize(finalize_), buffer(0), size(0), used(0), indent(0), possibleSpace(false), ast(ast_) {} + + void printAst() { + print(ast); + buffer[used] = 0; + } + + // Utils + + void ensure(int safety=100) { + if (size < used + safety) { + size = std::max(1024, size*2) + safety; + if (!buffer) { + buffer = (char*)malloc(size); + if (!buffer) { + printf("Out of memory allocating %d bytes for output buffer!", size); + abort(); + } + } else { + char *buf = (char*)realloc(buffer, size); + if (!buf) { + free(buffer); + printf("Out of memory allocating %d bytes for output buffer!", size); + abort(); + } + buffer = buf; + } + } + } + + void emit(char c) { + maybeSpace(c); + if (!pretty && c == '}' && buffer[used-1] == ';') used--; // optimize ;} into }, the ; is not separating anything + ensure(1); + buffer[used++] = c; + } + + void emit(const char *s) { + maybeSpace(*s); + int len = strlen(s); + ensure(len+1); + strcpy(buffer + used, s); + used += len; + } + + void newline() { + if (!pretty) return; + emit('\n'); + for (int i = 0; i < indent; i++) emit(' '); + } + + void space() { + if (pretty) emit(' '); + } + + void safeSpace() { + if (pretty) emit(' '); + else possibleSpace = true; + } + + void maybeSpace(char s) { + if (possibleSpace) { + possibleSpace = false; + if (isIdentPart(s)) emit(' '); + } + } + + void print(Ref node) { + ensure(); + IString type = node[0]->getIString(); + //fprintf(stderr, "printing %s\n", type.str); + switch (type.str[0]) { + case 'a': { + if (type == ASSIGN) printAssign(node); + else if (type == ARRAY) printArray(node); + else abort(); + break; + } + case 'b': { + if (type == BINARY) printBinary(node); + else if (type == BLOCK) printBlock(node); + else if (type == BREAK) printBreak(node); + else abort(); + break; + } + case 'c': { + if (type == CALL) printCall(node); + else if (type == CONDITIONAL) printConditional(node); + else if (type == CONTINUE) printContinue(node); + else abort(); + break; + } + case 'd': { + if (type == DEFUN) printDefun(node); + else if (type == DO) printDo(node); + else if (type == DOT) printDot(node); + else abort(); + break; + } + case 'i': { + if (type == IF) printIf(node); + else abort(); + break; + } + case 'l': { + if (type == LABEL) printLabel(node); + else abort(); + break; + } + case 'n': { + if (type == NAME) printName(node); + else if (type == NUM) printNum(node); + else if (type == NEW) printNew(node); + else abort(); + break; + } + case 'o': { + if (type == OBJECT) printObject(node); + break; + } + case 'r': { + if (type == RETURN) printReturn(node); + else abort(); + break; + } + case 's': { + if (type == STAT) printStat(node); + else if (type == SUB) printSub(node); + else if (type == SEQ) printSeq(node); + else if (type == SWITCH) printSwitch(node); + else if (type == STRING) printString(node); + else abort(); + break; + } + case 't': { + if (type == TOPLEVEL) printToplevel(node); + else abort(); + break; + } + case 'u': { + if (type == UNARY_PREFIX) printUnaryPrefix(node); + else abort(); + break; + } + case 'v': { + if (type == VAR) printVar(node); + else abort(); + break; + } + case 'w': { + if (type == WHILE) printWhile(node); + else abort(); + break; + } + default: { + printf("cannot yet print %s\n", type.str); + abort(); + } + } + } + + // print a node, and if nothing is emitted, emit something instead + void print(Ref node, const char *otherwise) { + int last = used; + print(node); + if (used == last) emit(otherwise); + } + + void printStats(Ref stats) { + bool first = true; + for (size_t i = 0; i < stats->size(); i++) { + Ref curr = stats[i]; + if (!isNothing(curr)) { + if (first) first = false; + else newline(); + print(stats[i]); + } + } + } + + void printToplevel(Ref node) { + if (node[1]->size() > 0) { + printStats(node[1]); + } + } + + void printBlock(Ref node) { + if (node->size() == 1 || node[1]->size() == 0) { + emit("{}"); + return; + } + emit('{'); + indent++; + newline(); + printStats(node[1]); + indent--; + newline(); + emit('}'); + } + + void printDefun(Ref node) { + emit("function "); + emit(node[1]->getCString()); + emit('('); + Ref args = node[2]; + for (size_t i = 0; i < args->size(); i++) { + if (i > 0) (pretty ? emit(", ") : emit(',')); + emit(args[i]->getCString()); + } + emit(')'); + space(); + if (node->size() == 3 || node[3]->size() == 0) { + emit("{}"); + return; + } + emit('{'); + indent++; + newline(); + printStats(node[3]); + indent--; + newline(); + emit('}'); + newline(); + } + + bool isNothing(Ref node) { + return (node[0] == TOPLEVEL && node[1]->size() == 0) || (node[0] == STAT && isNothing(node[1])); + } + + void printStat(Ref node) { + if (!isNothing(node[1])) { + print(node[1]); + if (buffer[used-1] != ';') emit(';'); + } + } + + void printAssign(Ref node) { + printChild(node[2], node, -1); + space(); + emit('='); + space(); + printChild(node[3], node, 1); + } + + void printName(Ref node) { + emit(node[1]->getCString()); + } + + static char* numToString(double d, bool finalize=true) { + bool neg = d < 0; + if (neg) d = -d; + // try to emit the fewest necessary characters + bool integer = fmod(d, 1) == 0; + #define BUFFERSIZE 1000 + static char full_storage_f[BUFFERSIZE], full_storage_e[BUFFERSIZE]; // f is normal, e is scientific for float, x for integer + static char *storage_f = full_storage_f + 1, *storage_e = full_storage_e + 1; // full has one more char, for a possible '-' + double err_f, err_e; + for (int e = 0; e <= 1; e++) { + char *buffer = e ? storage_e : storage_f; + double temp; + if (!integer) { + static char format[6]; + for (int i = 0; i <= 18; i++) { + format[0] = '%'; + format[1] = '.'; + if (i < 10) { + format[2] = '0' + i; + format[3] = e ? 'e' : 'f'; + format[4] = 0; + } else { + format[2] = '1'; + format[3] = '0' + (i - 10); + format[4] = e ? 'e' : 'f'; + format[5] = 0; + } + snprintf(buffer, BUFFERSIZE-1, format, d); + sscanf(buffer, "%lf", &temp); + //errv("%.18f, %.18e => %s => %.18f, %.18e (%d), ", d, d, buffer, temp, temp, temp == d); + if (temp == d) break; + } + } else { + // integer + assert(d >= 0); + unsigned long long uu = (unsigned long long)d; + if (uu == d) { + bool asHex = e && !finalize; + snprintf(buffer, BUFFERSIZE-1, asHex ? "0x%llx" : "%llu", uu); + if (asHex) { + unsigned long long tempULL; + sscanf(buffer, "%llx", &tempULL); + temp = (double)tempULL; + } else { + sscanf(buffer, "%lf", &temp); + } + } else { + // too large for a machine integer, just use floats + snprintf(buffer, BUFFERSIZE-1, e ? "%e" : "%.0f", d); // even on integers, e with a dot is useful, e.g. 1.2e+200 + sscanf(buffer, "%lf", &temp); + } + //errv("%.18f, %.18e => %s => %.18f, %.18e, %llu (%d)\n", d, d, buffer, temp, temp, uu, temp == d); + } + (e ? err_e : err_f) = fabs(temp - d); + //errv("current attempt: %.18f => %s", d, buffer); + //assert(temp == d); + char *dot = strchr(buffer, '.'); + if (dot) { + // remove trailing zeros + char *end = dot+1; + while (*end >= '0' && *end <= '9') end++; + end--; + while (*end == '0') { + char *copy = end; + do { + copy[0] = copy[1]; + } while (*copy++ != 0); + end--; + } + //errv("%.18f => %s", d, buffer); + // remove preceding zeros + while (*buffer == '0') { + char *copy = buffer; + do { + copy[0] = copy[1]; + } while (*copy++ != 0); + } + //errv("%.18f ===> %s", d, buffer); + } else if (!integer || !e) { + // no dot. try to change 12345000 => 12345e3 + char *end = strchr(buffer, 0); + end--; + char *test = end; + // remove zeros, and also doubles can use at most 24 digits, we can truncate any extras even if not zero + while ((*test == '0' || test - buffer > 24) && test > buffer) test--; + int num = end - test; + if (num >= 3) { + test++; + test[0] = 'e'; + if (num < 10) { + test[1] = '0' + num; + test[2] = 0; + } else if (num < 100) { + test[1] = '0' + (num / 10); + test[2] = '0' + (num % 10); + test[3] = 0; + } else { + assert(num < 1000); + test[1] = '0' + (num / 100); + test[2] = '0' + (num % 100) / 10; + test[3] = '0' + (num % 10); + test[4] = 0; + } + } + } + //errv("..current attempt: %.18f => %s", d, buffer); + } + //fprintf(stderr, "options:\n%s\n%s\n (first? %d)\n", storage_e, storage_f, strlen(storage_e) < strlen(storage_f)); + char *ret; + if (err_e == err_f) { + ret = strlen(storage_e) < strlen(storage_f) ? storage_e : storage_f; + } else { + ret = err_e < err_f ? storage_e : storage_f; + } + if (neg) { + ret--; // safe to go back one, there is one more char in full_* + *ret = '-'; + } + return ret; + } + + void printNum(Ref node) { + emit(numToString(node[1]->getNumber(), finalize)); + } + + void printString(Ref node) { + emit('"'); + emit(node[1]->getCString()); + emit('"'); + } + + // Parens optimizing + + bool capturesOperators(Ref node) { + Ref type = node[0]; + return type == CALL || type == ARRAY || type == OBJECT || type == SEQ; + } + + int getPrecedence(Ref node, bool parent) { + Ref type = node[0]; + if (type == BINARY || type == UNARY_PREFIX) { + return OperatorClass::getPrecedence(type == BINARY ? OperatorClass::Binary : OperatorClass::Prefix, node[1]->getIString()); + } else if (type == SEQ) { + return OperatorClass::getPrecedence(OperatorClass::Binary, COMMA); + } else if (type == CALL) { + return parent ? OperatorClass::getPrecedence(OperatorClass::Binary, COMMA) : -1; // call arguments are split by commas, but call itself is safe + } else if (type == ASSIGN) { + return OperatorClass::getPrecedence(OperatorClass::Binary, SET); + } else if (type == CONDITIONAL) { + return OperatorClass::getPrecedence(OperatorClass::Tertiary, QUESTION); + } + // otherwise, this is something that fixes precedence explicitly, and we can ignore + return -1; // XXX + } + + // check whether we need parens for the child, when rendered in the parent + // @param childPosition -1 means it is printed to the left of parent, 0 means "anywhere", 1 means right + bool needParens(Ref parent, Ref child, int childPosition) { + int parentPrecedence = getPrecedence(parent, true); + int childPrecedence = getPrecedence(child, false); + + if (childPrecedence > parentPrecedence) return true; // child is definitely a danger + if (childPrecedence < parentPrecedence) return false; // definitely cool + // equal precedence, so associativity (rtl/ltr) is what matters + // (except for some exceptions, where multiple operators can combine into confusion) + if (parent[0] == UNARY_PREFIX) { + assert(child[0] == UNARY_PREFIX); + if ((parent[1] == PLUS || parent[1] == MINUS) && child[1] == parent[1]) { + // cannot emit ++x when we mean +(+x) + return true; + } + } + if (childPosition == 0) return true; // child could be anywhere, so always paren + if (childPrecedence < 0) return false; // both precedences are safe + // check if child is on the dangerous side + if (OperatorClass::getRtl(parentPrecedence)) return childPosition < 0; + else return childPosition > 0; + } + + void printChild(Ref child, Ref parent, int childPosition=0) { + bool parens = needParens(parent, child, childPosition); + if (parens) emit('('); + print(child); + if (parens) emit(')'); + } + + void printBinary(Ref node) { + printChild(node[2], node, -1); + space(); + emit(node[1]->getCString()); + space(); + printChild(node[3], node, 1); + } + + void printUnaryPrefix(Ref node) { + if (finalize && node[1] == PLUS && (node[2][0] == NUM || + (node[2][0] == UNARY_PREFIX && node[2][1] == MINUS && node[2][2][0] == NUM))) { + // emit a finalized number + int last = used; + print(node[2]); + ensure(1); // we temporarily append a 0 + char *curr = buffer + last; // ensure might invalidate + buffer[used] = 0; + if (strchr(curr, '.')) return; // already a decimal point, all good + char *e = strchr(curr, 'e'); + if (!e) { + emit(".0"); + return; + } + ensure(3); + curr = buffer + last; // ensure might invalidate + char *end = strchr(curr, 0); + while (end >= e) { + end[2] = end[0]; + end--; + } + e[0] = '.'; + e[1] = '0'; + used += 2; + return; + } + if ((buffer[used-1] == '-' && node[1] == MINUS) || + (buffer[used-1] == '+' && node[1] == PLUS)) { + emit(' '); // cannot join - and - to --, looks like the -- operator + } + emit(node[1]->getCString()); + printChild(node[2], node, 1); + } + + void printConditional(Ref node) { + printChild(node[1], node, -1); + space(); + emit('?'); + space(); + printChild(node[2], node, 0); + space(); + emit(':'); + space(); + printChild(node[3], node, 1); + } + + void printCall(Ref node) { + printChild(node[1], node, 0); + emit('('); + Ref args = node[2]; + for (size_t i = 0; i < args->size(); i++) { + if (i > 0) (pretty ? emit(", ") : emit(',')); + printChild(args[i], node, 0); + } + emit(')'); + } + + void printSeq(Ref node) { + printChild(node[1], node, -1); + emit(','); + space(); + printChild(node[2], node, 1); + } + + void printDot(Ref node) { + print(node[1]); + emit('.'); + emit(node[2]->getCString()); + } + + void printSwitch(Ref node) { + emit("switch"); + space(); + emit('('); + print(node[1]); + emit(')'); + space(); + emit('{'); + newline(); + Ref cases = node[2]; + for (size_t i = 0; i < cases->size(); i++) { + Ref c = cases[i]; + if (!c[0]) { + emit("default:"); + } else { + emit("case "); + print(c[0]); + emit(':'); + } + if (c[1]->size() > 0) { + indent++; + newline(); + int curr = used; + printStats(c[1]); + indent--; + if (curr != used) newline(); + else used--; // avoid the extra indentation we added tentatively + } else { + newline(); + } + } + emit('}'); + } + + void printSub(Ref node) { + printChild(node[1], node, -1); + emit('['); + print(node[2]); + emit(']'); + } + + void printVar(Ref node) { + emit("var "); + Ref args = node[1]; + for (size_t i = 0; i < args->size(); i++) { + if (i > 0) (pretty ? emit(", ") : emit(',')); + emit(args[i][0]->getCString()); + if (args[i]->size() > 1) { + space(); + emit('='); + space(); + print(args[i][1]); + } + } + emit(';'); + } + + static bool ifHasElse(Ref node) { + assert(node[0] == IF); + return node->size() >= 4 && !!node[3]; + } + + void printIf(Ref node) { + emit("if"); + safeSpace(); + emit('('); + print(node[1]); + emit(')'); + space(); + // special case: we need braces to save us from ambiguity, if () { if () } else. otherwise else binds to inner if + // also need to recurse for if () { if () { } else { if () } else + // (note that this is only a problem if the if body has a single element in it, not a block or such, as then + // the block would be braced) + // this analysis is a little conservative - it assumes any child if could be confused with us, which implies + // all other braces vanished (the worst case for us, we are not saved by other braces). + bool needBraces = false; + bool hasElse = ifHasElse(node); + if (hasElse) { + Ref child = node[2]; + while (child[0] == IF) { + if (!ifHasElse(child)) { + needBraces = true; + break; + } + child = child[3]; // continue into the else + } + } + if (needBraces) { + emit('{'); + indent++; + newline(); + print(node[2]); + indent--; + newline(); + emit('}'); + } else { + print(node[2], "{}"); + } + if (hasElse) { + space(); + emit("else"); + safeSpace(); + print(node[3], "{}"); + } + } + + void printDo(Ref node) { + emit("do"); + safeSpace(); + print(node[2], "{}"); + space(); + emit("while"); + space(); + emit('('); + print(node[1]); + emit(')'); + emit(';'); + } + + void printWhile(Ref node) { + emit("while"); + space(); + emit('('); + print(node[1]); + emit(')'); + space(); + print(node[2], "{}"); + } + + void printLabel(Ref node) { + emit(node[1]->getCString()); + space(); + emit(':'); + space(); + print(node[2]); + } + + void printReturn(Ref node) { + emit("return"); + if (!!node[1]) { + emit(' '); + print(node[1]); + } + emit(';'); + } + + void printBreak(Ref node) { + emit("break"); + if (!!node[1]) { + emit(' '); + emit(node[1]->getCString()); + } + emit(';'); + } + + void printContinue(Ref node) { + emit("continue"); + if (!!node[1]) { + emit(' '); + emit(node[1]->getCString()); + } + emit(';'); + } + + void printNew(Ref node) { + emit("new "); + print(node[1]); + } + + void printArray(Ref node) { + emit('['); + Ref args = node[1]; + for (size_t i = 0; i < args->size(); i++) { + if (i > 0) (pretty ? emit(", ") : emit(',')); + print(args[i]); + } + emit(']'); + } + + void printObject(Ref node) { + emit('{'); + indent++; + newline(); + Ref args = node[1]; + for (size_t i = 0; i < args->size(); i++) { + if (i > 0) { + pretty ? emit(", ") : emit(','); + newline(); + } + emit('"'); + emit(args[i][0]->getCString()); + emit("\":"); + space(); + print(args[i][1]); + } + indent--; + newline(); + emit('}'); + } +}; + +// cashew builder + +class ValueBuilder { + static IStringSet statable; + + static Ref makeRawString(const IString& s) { + return &arena.alloc()->setString(s); + } + + static Ref makeRawArray(int size_hint=0) { + return &arena.alloc()->setArray(size_hint); + } + + static Ref makeNull() { + return &arena.alloc()->setNull(); + } + +public: + static Ref makeToplevel() { + return &makeRawArray(2)->push_back(makeRawString(TOPLEVEL)) + .push_back(makeRawArray()); + } + + static Ref makeString(IString str) { + return &makeRawArray(2)->push_back(makeRawString(STRING)) + .push_back(makeRawString(str)); + } + + static Ref makeBlock() { + return &makeRawArray(2)->push_back(makeRawString(BLOCK)) + .push_back(makeRawArray()); + } + + static Ref makeName(IString name) { + return &makeRawArray(2)->push_back(makeRawString(NAME)) + .push_back(makeRawString(name)); + } + + static void setBlockContent(Ref target, Ref block) { + if (target[0] == TOPLEVEL) { + target[1]->setArray(block[1]->getArray()); + } else if (target[0] == DEFUN) { + target[3]->setArray(block[1]->getArray()); + } else abort(); + } + + static void appendToBlock(Ref block, Ref element) { + assert(block[0] == BLOCK); + block[1]->push_back(element); + } + + static Ref makeCall(Ref target) { + return &makeRawArray(3)->push_back(makeRawString(CALL)) + .push_back(target) + .push_back(makeRawArray()); + } + + static void appendToCall(Ref call, Ref element) { + assert(call[0] == CALL); + call[2]->push_back(element); + } + + static Ref makeStatement(Ref contents) { + if (statable.has(contents[0]->getIString())) { + return &makeRawArray(2)->push_back(makeRawString(STAT)) + .push_back(contents); + } else { + return contents; // only very specific things actually need to be stat'ed + } + } + + static Ref makeDouble(double num) { + return &makeRawArray(2)->push_back(makeRawString(NUM)) + .push_back(&arena.alloc()->setNumber(num)); + } + static Ref makeInt(uint32_t num) { + return makeDouble(double(num)); + } + + static Ref makeBinary(Ref left, IString op, Ref right) { + if (op == SET) { + return &makeRawArray(4)->push_back(makeRawString(ASSIGN)) + .push_back(&arena.alloc()->setBool(true)) + .push_back(left) + .push_back(right); + } else if (op == COMMA) { + return &makeRawArray(3)->push_back(makeRawString(SEQ)) + .push_back(left) + .push_back(right); + } else { + return &makeRawArray(4)->push_back(makeRawString(BINARY)) + .push_back(makeRawString(op)) + .push_back(left) + .push_back(right); + } + } + + static Ref makePrefix(IString op, Ref right) { + return &makeRawArray(3)->push_back(makeRawString(UNARY_PREFIX)) + .push_back(makeRawString(op)) + .push_back(right); + } + + static Ref makeFunction(IString name) { + return &makeRawArray(4)->push_back(makeRawString(DEFUN)) + .push_back(makeRawString(name)) + .push_back(makeRawArray()) + .push_back(makeRawArray()); + } + + static void appendArgumentToFunction(Ref func, IString arg) { + assert(func[0] == DEFUN); + func[2]->push_back(makeRawString(arg)); + } + + static Ref makeVar(bool is_const) { + return &makeRawArray(2)->push_back(makeRawString(VAR)) + .push_back(makeRawArray()); + } + + static void appendToVar(Ref var, IString name, Ref value) { + assert(var[0] == VAR); + Ref array = &makeRawArray(1)->push_back(makeRawString(name)); + if (!!value) array->push_back(value); + var[1]->push_back(array); + } + + static Ref makeReturn(Ref value) { + return &makeRawArray(2)->push_back(makeRawString(RETURN)) + .push_back(!!value ? value : makeNull()); + } + + static Ref makeIndexing(Ref target, Ref index) { + return &makeRawArray(3)->push_back(makeRawString(SUB)) + .push_back(target) + .push_back(index); + } + + static Ref makeIf(Ref condition, Ref ifTrue, Ref ifFalse) { + return &makeRawArray(4)->push_back(makeRawString(IF)) + .push_back(condition) + .push_back(ifTrue) + .push_back(!!ifFalse ? ifFalse : makeNull()); + } + + static Ref makeConditional(Ref condition, Ref ifTrue, Ref ifFalse) { + return &makeRawArray(4)->push_back(makeRawString(CONDITIONAL)) + .push_back(condition) + .push_back(ifTrue) + .push_back(ifFalse); + } + + static Ref makeDo(Ref body, Ref condition) { + return &makeRawArray(3)->push_back(makeRawString(DO)) + .push_back(condition) + .push_back(body); + } + + static Ref makeWhile(Ref condition, Ref body) { + return &makeRawArray(3)->push_back(makeRawString(WHILE)) + .push_back(condition) + .push_back(body); + } + + static Ref makeBreak(IString label) { + return &makeRawArray(2)->push_back(makeRawString(BREAK)) + .push_back(!!label ? makeRawString(label) : makeNull()); + } + + static Ref makeContinue(IString label) { + return &makeRawArray(2)->push_back(makeRawString(CONTINUE)) + .push_back(!!label ? makeRawString(label) : makeNull()); + } + + static Ref makeLabel(IString name, Ref body) { + return &makeRawArray(3)->push_back(makeRawString(LABEL)) + .push_back(makeRawString(name)) + .push_back(body); + } + + static Ref makeSwitch(Ref input) { + return &makeRawArray(3)->push_back(makeRawString(SWITCH)) + .push_back(input) + .push_back(makeRawArray()); + } + + static void appendCaseToSwitch(Ref switch_, Ref arg) { + assert(switch_[0] == SWITCH); + switch_[2]->push_back(&makeRawArray(2)->push_back(arg) + .push_back(makeRawArray())); + } + + static void appendDefaultToSwitch(Ref switch_) { + assert(switch_[0] == SWITCH); + switch_[2]->push_back(&makeRawArray(2)->push_back(makeNull()) + .push_back(makeRawArray())); + } + + static void appendCodeToSwitch(Ref switch_, Ref code, bool explicitBlock) { + assert(switch_[0] == SWITCH); + assert(code[0] == BLOCK); + if (!explicitBlock) { + for (size_t i = 0; i < code[1]->size(); i++) { + switch_[2]->back()->back()->push_back(code[1][i]); + } + } else { + switch_[2]->back()->back()->push_back(code); + } + } + + static Ref makeDot(Ref obj, IString key) { + return &makeRawArray(3)->push_back(makeRawString(DOT)) + .push_back(obj) + .push_back(makeRawString(key)); + } + + static Ref makeDot(Ref obj, Ref key) { + assert(key[0] == NAME); + return makeDot(obj, key[1]->getIString()); + } + + static Ref makeNew(Ref call) { + return &makeRawArray(2)->push_back(makeRawString(NEW)) + .push_back(call); + } + + static Ref makeArray() { + return &makeRawArray(2)->push_back(makeRawString(ARRAY)) + .push_back(makeRawArray()); + } + + static void appendToArray(Ref array, Ref element) { + assert(array[0] == ARRAY); + array[1]->push_back(element); + } + + static Ref makeObject() { + return &makeRawArray(2)->push_back(makeRawString(OBJECT)) + .push_back(makeRawArray()); + } + + static void appendToObject(Ref array, IString key, Ref value) { + assert(array[0] == OBJECT); + array[1]->push_back(&makeRawArray(2)->push_back(makeRawString(key)) + .push_back(value)); + } +}; + +// Tolerates 0.0 in the input; does not trust a +() to be there. +class DotZeroValueBuilder : public ValueBuilder { +public: + static Ref makeDouble(double num) { + return makePrefix(PLUS, ValueBuilder::makeDouble(num)); + } +}; + +} // namespace cashew + +#endif // __simple_ast_h__ + diff --git a/src/emscripten-optimizer/snprintf.h b/src/emscripten-optimizer/snprintf.h new file mode 100644 index 000000000..2792a23a2 --- /dev/null +++ b/src/emscripten-optimizer/snprintf.h @@ -0,0 +1,32 @@ +#include + +// Visual Studio does not support C99, so emulate snprintf support for it manually. + +#ifdef _MSC_VER + +#define snprintf c99_snprintf + +inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap) +{ + int count = -1; + + if (size != 0) + count = _vsnprintf_s(str, size, _TRUNCATE, format, ap); + if (count == -1) + count = _vscprintf(format, ap); + + return count; +} + +inline int c99_snprintf(char* str, size_t size, const char* format, ...) +{ + int count; + va_list ap; + + va_start(ap, format); + count = c99_vsnprintf(str, size, format, ap); + va_end(ap); + + return count; +} +#endif diff --git a/src/istring.h b/src/istring.h deleted file mode 100644 index b77fec4e5..000000000 --- a/src/istring.h +++ /dev/null @@ -1,161 +0,0 @@ -// Interned String type, 100% interned on creation. Comparisons are always just a pointer comparison - -#ifndef __istring_h__ -#define __istring_h__ - -#include -#include -#include - -#include -#include -#include -#include -#include - -namespace cashew { - -struct IString { - const char *str; - - static size_t hash_c(const char *str) { // see http://www.cse.yorku.ca/~oz/hash.html - unsigned int hash = 5381; - int c; - while ((c = *str++)) { - hash = ((hash << 5) + hash) ^ c; - } - return (size_t)hash; - } - - class CStringHash : public std::hash { - public: - size_t operator()(const char *str) const { - return IString::hash_c(str); - } - }; - class CStringEqual : public std::equal_to { - public: - bool operator()(const char *x, const char *y) const { - return strcmp(x, y) == 0; - } - }; - - IString() : str(nullptr) {} - IString(const char *s, bool reuse=true) { // if reuse=true, then input is assumed to remain alive; not copied - assert(s); - set(s, reuse); - } - - void set(const char *s, bool reuse=true) { - typedef std::unordered_set StringSet; - static StringSet* strings = new StringSet(); - - if (reuse) { - auto result = strings->insert(s); // if already present, does nothing - str = *(result.first); - } else { - auto existing = strings->find(s); - if (existing == strings->end()) { - char *copy = (char*)malloc(strlen(s)+1); // XXX leaked - strcpy(copy, s); - s = copy; - } else { - s = *existing; - } - strings->insert(s); - str = s; - } - } - - void set(const IString &s) { - str = s.str; - } - - void clear() { - str = nullptr; - } - - bool operator==(const IString& other) const { - //assert((str == other.str) == !strcmp(str, other.str)); - return str == other.str; // fast! - } - bool operator!=(const IString& other) const { - //assert((str == other.str) == !strcmp(str, other.str)); - return str != other.str; // fast! - } - bool operator<(const IString& other) const { - return strcmp(str ? str : "", other.str ? other.str : "") < 0; - } - - char operator[](int x) const { - return str[x]; - } - - bool operator!() const { // no string, or empty string - return !str || str[0] == 0; - } - - const char *c_str() const { return str; } - bool equals(const char *other) const { return !strcmp(str, other); } - - bool is() { return str != nullptr; } - bool isNull() { return str == nullptr; } -}; - -} // namespace cashew - -// Utilities for creating hashmaps/sets over IStrings - -namespace std { - -template <> struct hash : public unary_function { - size_t operator()(const cashew::IString& str) const { - size_t hash = size_t(str.str); - return hash = ((hash << 5) + hash) ^ 5381; /* (hash * 33) ^ c */ - } -}; - -template <> struct equal_to : public binary_function { - bool operator()(const cashew::IString& x, const cashew::IString& y) const { - return x == y; - } -}; - -} // namespace std - -namespace cashew { - -// IStringSet - -class IStringSet : public std::unordered_set { -public: - IStringSet() {} - IStringSet(const char *init) { // comma-delimited list - int size = strlen(init); - char *curr = new char[size+1]; // leaked! - strcpy(curr, init); - while (1) { - char *end = strchr(curr, ' '); - if (end) *end = 0; - insert(curr); - if (!end) break; - curr = end + 1; - } - } - - bool has(const IString& str) { - return count(str) > 0; - } -}; - -class IOrderedStringSet : public std::set { -public: - bool has(const IString& str) { - return count(str) > 0; - } -}; - -} // namespace cashew - -#endif // __istring_h__ - diff --git a/src/optimizer-shared.cpp b/src/optimizer-shared.cpp deleted file mode 100644 index 1c36efc62..000000000 --- a/src/optimizer-shared.cpp +++ /dev/null @@ -1,125 +0,0 @@ - -#include "optimizer.h" - -using namespace cashew; - -IString ASM_FLOAT_ZERO; - -IString SIMD_INT8X16_CHECK("SIMD_Int8x16_check"), - SIMD_INT16X8_CHECK("SIMD_Int16x8_check"), - SIMD_INT32X4_CHECK("SIMD_Int32x4_check"), - SIMD_FLOAT32X4_CHECK("SIMD_Float32x4_check"), - SIMD_FLOAT64X2_CHECK("SIMD_Float64x2_check"); - -bool isInteger(double x) { - return fmod(x, 1) == 0; -} - -bool isInteger32(double x) { - return isInteger(x) && (x == (int32_t)x || x == (uint32_t)x); -} - -int parseInt(const char *str) { - int ret = *str - '0'; - while (*(++str)) { - ret *= 10; - ret += *str - '0'; - } - return ret; -} - -HeapInfo parseHeap(const char *name) { - HeapInfo ret; - if (name[0] != 'H' || name[1] != 'E' || name[2] != 'A' || name[3] != 'P') { - ret.valid = false; - return ret; - } - ret.valid = true; - ret.unsign = name[4] == 'U'; - ret.floaty = name[4] == 'F'; - ret.bits = parseInt(name + (ret.unsign || ret.floaty ? 5 : 4)); - ret.type = !ret.floaty ? ASM_INT : (ret.bits == 64 ? ASM_DOUBLE : ASM_FLOAT); - return ret; -} - -AsmType detectType(Ref node, AsmData *asmData, bool inVarDef) { - switch (node[0]->getCString()[0]) { - case 'n': { - if (node[0] == NUM) { - if (!isInteger(node[1]->getNumber())) return ASM_DOUBLE; - return ASM_INT; - } else if (node[0] == NAME) { - if (asmData) { - AsmType ret = asmData->getType(node[1]->getCString()); - if (ret != ASM_NONE) return ret; - } - if (!inVarDef) { - if (node[1] == INF || node[1] == NaN) return ASM_DOUBLE; - if (node[1] == TEMP_RET0) return ASM_INT; - return ASM_NONE; - } - // We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0) - if (ASM_FLOAT_ZERO.isNull()) ASM_FLOAT_ZERO = node[1]->getIString(); - else assert(node[1] == ASM_FLOAT_ZERO); - return ASM_FLOAT; - } - break; - } - case 'u': { - if (node[0] == UNARY_PREFIX) { - switch (node[1]->getCString()[0]) { - case '+': return ASM_DOUBLE; - case '-': return detectType(node[2], asmData, inVarDef); - case '!': case '~': return ASM_INT; - } - break; - } - break; - } - case 'c': { - if (node[0] == CALL) { - if (node[1][0] == NAME) { - IString name = node[1][1]->getIString(); - if (name == MATH_FROUND) return ASM_FLOAT; - else if (name == SIMD_FLOAT32X4 || name == SIMD_FLOAT32X4_CHECK) return ASM_FLOAT32X4; - else if (name == SIMD_FLOAT64X2 || name == SIMD_FLOAT64X2_CHECK) return ASM_FLOAT64X2; - else if (name == SIMD_INT8X16 || name == SIMD_INT8X16_CHECK) return ASM_INT8X16; - else if (name == SIMD_INT16X8 || name == SIMD_INT16X8_CHECK) return ASM_INT16X8; - else if (name == SIMD_INT32X4 || name == SIMD_INT32X4_CHECK) return ASM_INT32X4; - } - return ASM_NONE; - } else if (node[0] == CONDITIONAL) { - return detectType(node[2], asmData, inVarDef); - } - break; - } - case 'b': { - if (node[0] == BINARY) { - switch (node[1]->getCString()[0]) { - case '+': case '-': - case '*': case '/': case '%': return detectType(node[2], asmData, inVarDef); - case '|': case '&': case '^': case '<': case '>': // handles <<, >>, >>=, <=, >= - case '=': case '!': { // handles ==, != - return ASM_INT; - } - } - } - break; - } - case 's': { - if (node[0] == SEQ) { - return detectType(node[2], asmData, inVarDef); - } else if (node[0] == SUB) { - assert(node[1][0] == NAME); - HeapInfo info = parseHeap(node[1][1]->getCString()); - if (info.valid) return ASM_NONE; - return info.floaty ? ASM_DOUBLE : ASM_INT; // XXX ASM_FLOAT? - } - break; - } - } - //dump("horrible", node); - //assert(0); - return ASM_NONE; -} - diff --git a/src/optimizer.cpp b/src/optimizer.cpp deleted file mode 100644 index d569fb928..000000000 --- a/src/optimizer.cpp +++ /dev/null @@ -1,3851 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "simple_ast.h" -#include "optimizer.h" - -#include "optimizer-shared.cpp" - -typedef std::vector StringVec; - -//================== -// Globals -//================== - -Ref extraInfo; - -//================== -// Infrastructure -//================== - -template -int indexOf(T list, V value) { - for (size_t i = 0; i < list.size(); i++) { - if (list[i] == value) return i; - } - return -1; -} - -int jsD2I(double x) { - return (int)((int64_t)x); -} - -char *strdupe(const char *str) { - char *ret = (char *)malloc(strlen(str)+1); // leaked! - strcpy(ret, str); - return ret; -} - -IString getHeapStr(int x, bool unsign) { - switch (x) { - case 8: return unsign ? HEAPU8 : HEAP8; - case 16: return unsign ? HEAPU16 : HEAP16; - case 32: return unsign ? HEAPU32 : HEAP32; - } - assert(0); - return ":("; -} - -Ref deStat(Ref node) { - if (node[0] == STAT) return node[1]; - return node; -} - -Ref getStatements(Ref node) { - if (node[0] == DEFUN) { - return node[3]; - } else if (node[0] == BLOCK) { - return node->size() > 1 ? node[1] : nullptr; - } else { - return arena.alloc(); - } -} - -// Types - -AsmType intToAsmType(int type) { - if (type >= 0 && type <= ASM_NONE) return (AsmType)type; - else { - assert(0); - return ASM_NONE; - } -} - -// forward decls -Ref makeEmpty(); -bool isEmpty(Ref node); -Ref makeAsmVarDef(const IString& v, AsmType type); -Ref makeArray(int size_hint); -Ref makeBool(bool b); -Ref makeNum(double x); -Ref makeName(IString str); -Ref makeAsmCoercion(Ref node, AsmType type); -Ref make1(IString type, Ref a); -Ref make3(IString type, Ref a, Ref b, Ref c); - -AsmData::AsmData(Ref f) { - func = f; - - // process initial params - Ref stats = func[3]; - size_t i = 0; - while (i < stats->size()) { - Ref node = stats[i]; - if (node[0] != STAT || node[1][0] != ASSIGN || node[1][2][0] != NAME) break; - node = node[1]; - Ref name = node[2][1]; - int index = func[2]->indexOf(name); - if (index < 0) break; // not an assign into a parameter, but a global - IString& str = name->getIString(); - if (locals.count(str) > 0) break; // already done that param, must be starting function body - locals[str] = Local(detectType(node[3]), true); - params.push_back(str); - stats[i] = makeEmpty(); - i++; - } - // process initial variable definitions and remove '= 0' etc parts - these - // are not actually assignments in asm.js - while (i < stats->size()) { - Ref node = stats[i]; - if (node[0] != VAR) break; - for (size_t j = 0; j < node[1]->size(); j++) { - Ref v = node[1][j]; - IString& name = v[0]->getIString(); - Ref value = v[1]; - if (locals.count(name) == 0) { - locals[name] = Local(detectType(value, nullptr, true), false); - vars.push_back(name); - v->setSize(1); // make an un-assigning var - } else { - assert(j == 0); // cannot break in the middle - goto outside; - } - } - i++; - } - outside: - // look for other var definitions and collect them - while (i < stats->size()) { - traversePre(stats[i], [&](Ref node) { - Ref type = node[0]; - if (type == VAR) { - dump("bad, seeing a var in need of fixing", func); - abort(); //, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node)); - } - }); - i++; - } - // look for final RETURN statement to get return type. - Ref retStmt = stats->back(); - if (!!retStmt && retStmt[0] == RETURN && !!retStmt[1]) { - ret = detectType(retStmt[1]); - } else { - ret = ASM_NONE; - } -} - -void AsmData::denormalize() { - Ref stats = func[3]; - // Remove var definitions, if any - for (size_t i = 0; i < stats->size(); i++) { - if (stats[i][0] == VAR) { - stats[i] = makeEmpty(); - } else { - if (!isEmpty(stats[i])) break; - } - } - // calculate variable definitions - Ref varDefs = makeArray(vars.size()); - for (auto v : vars) { - varDefs->push_back(makeAsmVarDef(v, locals[v].type)); - } - // each param needs a line; reuse emptyNodes as much as we can - size_t numParams = params.size(); - size_t emptyNodes = 0; - while (emptyNodes < stats->size()) { - if (!isEmpty(stats[emptyNodes])) break; - emptyNodes++; - } - size_t neededEmptyNodes = numParams + (varDefs->size() ? 1 : 0); // params plus one big var if there are vars - if (neededEmptyNodes > emptyNodes) { - stats->insert(0, neededEmptyNodes - emptyNodes); - } else if (neededEmptyNodes < emptyNodes) { - stats->splice(0, emptyNodes - neededEmptyNodes); - } - // add param coercions - int next = 0; - for (auto param : func[2]->getArray()) { - IString str = param->getIString(); - assert(locals.count(str) > 0); - stats[next++] = make1(STAT, make3(ASSIGN, makeBool(true), makeName(str.c_str()), makeAsmCoercion(makeName(str.c_str()), locals[str].type))); - } - if (varDefs->size()) { - stats[next] = make1(VAR, varDefs); - } - /* - if (inlines->size() > 0) { - var i = 0; - traverse(func, function(node, type) { - if (type == CALL && node[1][0] == NAME && node[1][1] == 'inlinejs') { - node[1] = inlines[i++]; // swap back in the body - } - }); - } - */ - // ensure that there's a final RETURN statement if needed. - if (ret != ASM_NONE) { - Ref retStmt = stats->back(); - if (!retStmt || retStmt[0] != RETURN) { - Ref retVal = makeNum(0); - if (ret != ASM_INT) { - retVal = makeAsmCoercion(retVal, ret); - } - stats->push_back(make1(RETURN, retVal)); - } - } - //printErr('denormalized \n\n' + astToSrc(func) + '\n\n'); -} - -// Constructions TODO: share common constructions, and assert they remain frozen - -Ref makeArray(int size_hint=0) { - return &arena.alloc()->setArray(size_hint); -} - -Ref makeBool(bool b) { - return &arena.alloc()->setBool(b); -} - -Ref makeString(const IString& s) { - return &arena.alloc()->setString(s); -} - -Ref makeEmpty() { - return ValueBuilder::makeToplevel(); -} - -Ref makeNum(double x) { - return ValueBuilder::makeDouble(x); -} - -Ref makeName(IString str) { - return ValueBuilder::makeName(str); -} - -Ref makeBlock() { - return ValueBuilder::makeBlock(); -} - -Ref make1(IString s1, Ref a) { - Ref ret(makeArray(2)); - ret->push_back(makeString(s1)); - ret->push_back(a); - return ret; -} - -Ref make2(IString s1, IString s2, Ref a) { - Ref ret(makeArray(2)); - ret->push_back(makeString(s1)); - ret->push_back(makeString(s2)); - ret->push_back(a); - return ret; -} - -Ref make2(IString s1, Ref a, Ref b) { - Ref ret(makeArray(3)); - ret->push_back(makeString(s1)); - ret->push_back(a); - ret->push_back(b); - return ret; -} - -Ref make3(IString type, IString a, Ref b, Ref c) { - Ref ret(makeArray(4)); - ret->push_back(makeString(type)); - ret->push_back(makeString(a)); - ret->push_back(b); - ret->push_back(c); - return ret; -} - -Ref make3(IString type, Ref a, Ref b, Ref c) { - Ref ret(makeArray(4)); - ret->push_back(makeString(type)); - ret->push_back(a); - ret->push_back(b); - ret->push_back(c); - return ret; -} - -Ref makeAsmVarDef(const IString& v, AsmType type) { - Ref val; - switch (type) { - case ASM_INT: val = makeNum(0); break; - case ASM_DOUBLE: val = make2(UNARY_PREFIX, PLUS, makeNum(0)); break; - case ASM_FLOAT: { - if (!ASM_FLOAT_ZERO.isNull()) { - val = makeName(ASM_FLOAT_ZERO); - } else { - val = make2(CALL, makeName(MATH_FROUND), &(makeArray(1))->push_back(makeNum(0))); - } - break; - } - case ASM_FLOAT32X4: { - val = make2(CALL, makeName(SIMD_FLOAT32X4), &(makeArray(4))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); - break; - } - case ASM_FLOAT64X2: { - val = make2(CALL, makeName(SIMD_FLOAT64X2), &(makeArray(2))->push_back(makeNum(0)).push_back(makeNum(0))); - break; - } - case ASM_INT8X16: { - val = make2(CALL, makeName(SIMD_INT8X16), &(makeArray(16))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); - break; - } - case ASM_INT16X8: { - val = make2(CALL, makeName(SIMD_INT16X8), &(makeArray(8))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); - break; - } - case ASM_INT32X4: { - val = make2(CALL, makeName(SIMD_INT32X4), &(makeArray(4))->push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0)).push_back(makeNum(0))); - break; - } - default: assert(0); - } - return make1(v, val); -} - -Ref makeAsmCoercion(Ref node, AsmType type) { - switch (type) { - case ASM_INT: return make3(BINARY, OR, node, makeNum(0)); - case ASM_DOUBLE: return make2(UNARY_PREFIX, PLUS, node); - case ASM_FLOAT: return make2(CALL, makeName(MATH_FROUND), &(makeArray(1))->push_back(node)); - case ASM_FLOAT32X4: return make2(CALL, makeName(SIMD_FLOAT32X4_CHECK), &(makeArray(1))->push_back(node)); - case ASM_FLOAT64X2: return make2(CALL, makeName(SIMD_FLOAT64X2_CHECK), &(makeArray(1))->push_back(node)); - case ASM_INT8X16: return make2(CALL, makeName(SIMD_INT8X16_CHECK), &(makeArray(1))->push_back(node)); - case ASM_INT16X8: return make2(CALL, makeName(SIMD_INT16X8_CHECK), &(makeArray(1))->push_back(node)); - case ASM_INT32X4: return make2(CALL, makeName(SIMD_INT32X4_CHECK), &(makeArray(1))->push_back(node)); - case ASM_NONE: - default: return node; // non-validating code, emit nothing XXX this is dangerous, we should only allow this when we know we are not validating - } -} - -// Checks - -bool isEmpty(Ref node) { - return (node->size() == 2 && node[0] == TOPLEVEL && node[1]->size() == 0) || - (node->size() > 0 && node[0] == BLOCK && (!node[1] || node[1]->size() == 0)); -} - -bool commable(Ref node) { // TODO: hashing - IString type = node[0]->getIString(); - if (type == ASSIGN || type == BINARY || type == UNARY_PREFIX || type == NAME || type == NUM || type == CALL || type == SEQ || type == CONDITIONAL || type == SUB) return true; - return false; -} - -bool isMathFunc(const char *name) { - static const char *Math_ = "Math_"; - static unsigned size = strlen(Math_); - return strncmp(name, Math_, size) == 0; -} - -bool isMathFunc(Ref value) { - return value->isString() && isMathFunc(value->getCString()); -} - -bool callHasSideEffects(Ref node) { // checks if the call itself (not the args) has side effects (or is not statically known) - return !(node[1][0] == NAME && isMathFunc(node[1][1])); -} - -bool hasSideEffects(Ref node) { // this is 99% incomplete! - IString type = node[0]->getIString(); - switch (type[0]) { - case 'n': - if (type == NUM || type == NAME) return false; - break; - case 's': - if (type == STRING) return false; - if (type == SUB) return hasSideEffects(node[1]) || hasSideEffects(node[2]); - break; - case 'u': - if (type == UNARY_PREFIX) return hasSideEffects(node[2]); - break; - case 'b': - if (type == BINARY) return hasSideEffects(node[2]) || hasSideEffects(node[3]); - break; - case 'c': - if (type == CALL) { - if (callHasSideEffects(node)) return true; - // This is a statically known call, with no side effects. only args can side effect us - for (auto arg : node[2]->getArray()) { - if (hasSideEffects(arg)) return true; - } - return false; - } else if (type == CONDITIONAL) return hasSideEffects(node[1]) || hasSideEffects(node[2]) || hasSideEffects(node[3]); - break; - } - return true; -} - -// checks if a node has just basic operations, nothing with side effects nor that can notice side effects, which -// implies we can move it around in the code -bool triviallySafeToMove(Ref node, AsmData& asmData) { - bool ok = true; - traversePre(node, [&](Ref node) { - Ref type = node[0]; - if (type == STAT || type == BINARY || type == UNARY_PREFIX || type == ASSIGN || type == NUM) return; - else if (type == NAME) { - if (!asmData.isLocal(node[1]->getIString())) ok = false; - } else if (type == CALL) { - if (callHasSideEffects(node)) ok = false; - } else { - ok = false; - } - }); - return ok; -} - -// Transforms - -// We often have branchings that are simplified so one end vanishes, and -// we then get -// if (!(x < 5)) -// or such. Simplifying these saves space and time. -Ref simplifyNotCompsDirect(Ref node) { - if (node[0] == UNARY_PREFIX && node[1] == L_NOT) { - // de-morgan's laws do not work on floats, due to nans >:( - if (node[2][0] == BINARY && (detectType(node[2][2]) == ASM_INT && detectType(node[2][3]) == ASM_INT)) { - Ref op = node[2][1]; - switch(op->getCString()[0]) { - case '<': { - if (op == LT) { op->setString(GE); break; } - if (op == LE) { op->setString(GT); break; } - return node; - } - case '>': { - if (op == GT) { op->setString(LE); break; } - if (op == GE) { op->setString(LT); break; } - return node; - } - case '=': { - if (op == EQ) { op->setString(NE); break; } - return node; - } - case '!': { - if (op == NE) { op->setString(EQ); break; } - return node; - } - default: return node; - } - return make3(BINARY, op, node[2][2], node[2][3]); - } else if (node[2][0] == UNARY_PREFIX && node[2][1] == L_NOT) { - return node[2][2]; - } - } - return node; -} - -Ref flipCondition(Ref cond) { - return simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, cond)); -} - -void safeCopy(Ref target, Ref source) { // safely copy source onto target, even if source is a subnode of target - Ref temp = source; // hold on to source - *target = *temp; -} - -void clearEmptyNodes(Ref arr) { - int skip = 0; - for (size_t i = 0; i < arr->size(); i++) { - if (skip) { - arr[i-skip] = arr[i]; - } - if (isEmpty(deStat(arr[i]))) { - skip++; - } - } - if (skip) arr->setSize(arr->size() - skip); -} -void clearUselessNodes(Ref arr) { - int skip = 0; - for (size_t i = 0; i < arr->size(); i++) { - Ref curr = arr[i]; - if (skip) { - arr[i-skip] = curr; - } - if (isEmpty(deStat(curr)) || (curr[0] == STAT && !hasSideEffects(curr[1]))) { - skip++; - } - } - if (skip) arr->setSize(arr->size() - skip); -} - -void removeAllEmptySubNodes(Ref ast) { - traversePre(ast, [](Ref node) { - if (node[0] == DEFUN) { - clearEmptyNodes(node[3]); - } else if (node[0] == BLOCK && node->size() > 1 && !!node[1]) { - clearEmptyNodes(node[1]); - } else if (node[0] == SEQ && isEmpty(node[1])) { - safeCopy(node, node[2]); - } - }); -} -void removeAllUselessSubNodes(Ref ast) { - traversePrePost(ast, [](Ref node) { - Ref type = node[0]; - if (type == DEFUN) { - clearUselessNodes(node[3]); - } else if (type == BLOCK && node->size() > 1 && !!node[1]) { - clearUselessNodes(node[1]); - } else if (type == SEQ && isEmpty(node[1])) { - safeCopy(node, node[2]); - } - }, [](Ref node) { - Ref type = node[0]; - if (type == IF) { - bool empty2 = isEmpty(node[2]), has3 = node->size() == 4 && !!node[3], empty3 = !has3 || isEmpty(node[3]); - if (!empty2 && empty3 && has3) { // empty else clauses - node->setSize(3); - } else if (empty2 && !empty3) { // empty if blocks - safeCopy(node, make2(IF, make2(UNARY_PREFIX, L_NOT, node[1]), node[3])); - } else if (empty2 && empty3) { - if (hasSideEffects(node[1])) { - safeCopy(node, make1(STAT, node[1])); - } else { - safeCopy(node, makeEmpty()); - } - } - } - }); -} - -Ref unVarify(Ref vars) { // transform var x=1, y=2 etc. into (x=1, y=2), i.e., the same assigns, but without a var definition - Ref ret = makeArray(1); - ret->push_back(makeString(STAT)); - if (vars->size() == 1) { - ret->push_back(make3(ASSIGN, makeBool(true), makeName(vars[0][0]->getIString()), vars[0][1])); - } else { - ret->push_back(makeArray(vars->size()-1)); - Ref curr = ret[1]; - for (size_t i = 0; i+1 < vars->size(); i++) { - curr->push_back(makeString(SEQ)); - curr->push_back(make3(ASSIGN, makeBool(true), makeName(vars[i][0]->getIString()), vars[i][1])); - if (i != vars->size()-2) { - curr->push_back(makeArray()); - curr = curr[2]; - } - } - curr->push_back(make3(ASSIGN, makeBool(true), makeName(vars->back()[0]->getIString()), vars->back()[1])); - } - return ret; -} - -// Calculations - -int measureCost(Ref ast) { - int size = 0; - traversePre(ast, [&size](Ref node) { - Ref type = node[0]; - if (type == NUM || type == UNARY_PREFIX) size--; - else if (type == BINARY) { - if (node[3][0] == NUM && node[3][1]->getNumber() == 0) size--; - else if (node[1] == DIV || node[1] == MOD) size += 2; - } - else if (type == CALL && !callHasSideEffects(node)) size -= 2; - else if (type == SUB) size++; - size++; - }); - return size; -} - -//================== -// Params -//================== - -bool preciseF32 = false, - receiveJSON = false, - emitJSON = false, - minifyWhitespace = false, - last = false; - -//===================== -// Optimization passes -//===================== - -#define HASES \ - bool has(const IString& str) { \ - return count(str) > 0; \ - } \ - bool has(Ref node) { \ - return node->isString() && count(node->getIString()) > 0; \ - } - -class StringSet : public cashew::IStringSet { -public: - StringSet() {} - StringSet(const char *str) : IStringSet(str) {} - - HASES - - void dump() { - err("==="); - for (auto str : *this) { - errv("%s", str.c_str()); - } - err("==="); - } -}; - -StringSet USEFUL_BINARY_OPS("<< >> | & ^"), - COMPARE_OPS("< <= > >= == == != !=="), - BITWISE("| & ^"), - SAFE_BINARY_OPS("+ -"), // division is unsafe as it creates non-ints in JS; mod is unsafe as signs matter so we can't remove |0's; mul does not nest with +,- in asm - COERCION_REQUIRING_OPS("sub unary-prefix"), // ops that in asm must be coerced right away - COERCION_REQUIRING_BINARIES("* / %"); // binary ops that in asm must be coerced - -StringSet ASSOCIATIVE_BINARIES("+ * | & ^"), - CONTROL_FLOW("do while for if switch"), - LOOP("do while for"), - NAME_OR_NUM("name num"), - CONDITION_CHECKERS("if do while switch"), - SAFE_TO_DROP_COERCION("unary-prefix name num"); - -StringSet BREAK_CAPTURERS("do while for switch"), - CONTINUE_CAPTURERS("do while for"), - FUNCTIONS_THAT_ALWAYS_THROW("abort ___resumeException ___cxa_throw ___cxa_rethrow"); - -bool isFunctionTable(const char *name) { - static const char *functionTable = "FUNCTION_TABLE"; - static unsigned size = strlen(functionTable); - return strncmp(name, functionTable, size) == 0; -} - -bool isFunctionTable(Ref value) { - return value->isString() && isFunctionTable(value->getCString()); -} - -// Internal utilities - -bool canDropCoercion(Ref node) { - if (SAFE_TO_DROP_COERCION.has(node[0])) return true; - if (node[0] == BINARY) { - switch (node[1]->getCString()[0]) { - case '>': return node[1] == RSHIFT || node[1] == TRSHIFT; - case '<': return node[1] == LSHIFT; - case '|': case '^': case '&': return true; - } - } - return false; -} - -Ref simplifyCondition(Ref node) { - node = simplifyNotCompsDirect(node); - // on integers, if (x == 0) is the same as if (x), and if (x != 0) as if (!x) - if (node[0] == BINARY && (node[1] == EQ || node[1] == NE)) { - Ref target; - if (detectType(node[2]) == ASM_INT && node[3][0] == NUM && node[3][1]->getNumber() == 0) { - target = node[2]; - } else if (detectType(node[3]) == ASM_INT && node[2][0] == NUM && node[2][1]->getNumber() == 0) { - target = node[3]; - } - if (!!target) { - if (target[0] == BINARY && (target[1] == OR || target[1] == TRSHIFT) && target[3][0] == NUM && target[3][1]->getNumber() == 0 && - canDropCoercion(target[2])) { - target = target[2]; // drop the coercion, in a condition it is ok to do if (x) - } - if (node[1] == EQ) { - return make2(UNARY_PREFIX, L_NOT, target); - } else { - return target; - } - } - } - return node; -} - -// Passes - -// Eliminator aka Expressionizer -// -// The goal of this pass is to eliminate unneeded variables (which represent one of the infinite registers in the LLVM -// model) and thus to generate complex expressions where possible, for example -// -// var x = a(10); -// var y = HEAP[20]; -// print(x+y); -// -// can be transformed into -// -// print(a(10)+HEAP[20]); -// -// The basic principle is to scan along the code in the order of parsing/execution, and keep a list of tracked -// variables that are current contenders for elimination. We must untrack when we see something that we cannot -// cross, for example, a write to memory means we must invalidate variables that depend on reading from -// memory, since if we change the order then we do not preserve the computation. -// -// We rely on some assumptions about emscripten-generated code here, which means we can do a lot more than -// a general JS optimization can. For example, we assume that SUB nodes (indexing like HEAP[..]) are -// memory accesses or FUNCTION_TABLE accesses, and in both cases that the symbol cannot be replaced although -// the contents can. So we assume FUNCTION_TABLE might have its contents changed but not be pointed to -// a different object, which allows -// -// var x = f(); -// FUNCTION_TABLE[x](); -// -// to be optimized (f could replace FUNCTION_TABLE, so in general JS eliminating x is not valid). -// -// In memSafe mode, we are more careful and assume functions can replace HEAP and FUNCTION_TABLE, which -// can happen in ALLOW_MEMORY_GROWTH mode - -StringSet ELIMINATION_SAFE_NODES("assign call if toplevel do return label switch binary unary-prefix"); // do is checked carefully, however -StringSet IGNORABLE_ELIMINATOR_SCAN_NODES("num toplevel string break continue dot"); // dot can only be STRING_TABLE.* -StringSet ABORTING_ELIMINATOR_SCAN_NODES("new object function defun for while array throw"); // we could handle some of these, TODO, but nontrivial (e.g. for while, the condition is hit multiple times after the body) -StringSet HEAP_NAMES("HEAP8 HEAP16 HEAP32 HEAPU8 HEAPU16 HEAPU32 HEAPF32 HEAPF64"); - -bool isTempDoublePtrAccess(Ref node) { // these are used in bitcasts; they are not really affecting memory, and should cause no invalidation - assert(node[0] == SUB); - return (node[2][0] == NAME && node[2][1] == TEMP_DOUBLE_PTR) || - (node[2][0] == BINARY && ((node[2][2][0] == NAME && node[2][2][1] == TEMP_DOUBLE_PTR) || - (node[2][3][0] == NAME && node[2][3][1] == TEMP_DOUBLE_PTR))); -} - -class StringIntMap : public std::unordered_map { -public: - HASES -}; - -class StringStringMap : public std::unordered_map { -public: - HASES -}; - -class StringRefMap : public std::unordered_map { -public: - HASES -}; - -class StringTypeMap : public std::unordered_map { -public: - HASES -}; - -void eliminate(Ref ast, bool memSafe) { -#ifdef PROFILING - clock_t tasmdata = 0; - clock_t tfnexamine = 0; - clock_t tvarcheck = 0; - clock_t tstmtelim = 0; - clock_t tstmtscan = 0; - clock_t tcleanvars = 0; - clock_t treconstruct = 0; -#endif - - // Find variables that have a single use, and if they can be eliminated, do so - traverseFunctions(ast, [&](Ref func) { - -#ifdef PROFILING - clock_t start = clock(); -#endif - - AsmData asmData(func); - -#ifdef PROFILING - tasmdata += clock() - start; - start = clock(); -#endif - - // First, find the potentially eliminatable functions: that have one definition and one use - - StringIntMap definitions; - StringIntMap uses; - StringIntMap namings; - StringRefMap values; - StringIntMap varsToRemove; // variables being removed, that we can eliminate all 'var x;' of (this refers to VAR nodes we should remove) - // 1 means we should remove it, 2 means we successfully removed it - StringSet varsToTryToRemove; // variables that have 0 uses, but have side effects - when we scan we can try to remove them - - // examine body and note locals - traversePre(func, [&](Ref node) { - Ref type = node[0]; - if (type == NAME) { - IString& name = node[1]->getIString(); - uses[name]++; - namings[name]++; - } else if (type == ASSIGN) { - Ref target = node[2]; - if (target[0] == NAME) { - IString& name = target[1]->getIString(); - // values is only used if definitions is 1 - if (definitions[name]++ == 0) { - values[name] = node[3]; - } - assert(node[1]->isBool(true)); // not +=, -= etc., just = - uses[name]--; // because the name node will show up by itself in the previous case - } - } - }); - -#ifdef PROFILING - tfnexamine += clock() - start; - start = clock(); -#endif - - StringSet potentials; // local variables with 1 definition and 1 use - StringSet sideEffectFree; // whether a local variable has no side effects in its definition. Only relevant when there are no uses - - auto unprocessVariable = [&](IString name) { - potentials.erase(name); - varsToRemove.erase(name); - sideEffectFree.erase(name); - varsToTryToRemove.erase(name); - }; - std::function processVariable = [&](IString name) { - if (definitions[name] == 1 && uses[name] == 1) { - potentials.insert(name); - } else if (uses[name] == 0 && definitions[name] <= 1) { // no uses, no def or 1 def (cannot operate on phis, and the llvm optimizer will remove unneeded phis anyhow) (no definition means it is a function parameter, or a local with just |var x;| but no defining assignment) - bool sideEffects = false; - auto val = values.find(name); - Ref value; - if (val != values.end()) { - value = val->second; - // TODO: merge with other side effect code - // First, pattern-match - // (HEAP32[((tempDoublePtr)>>2)]=((HEAP32[(($_sroa_0_0__idx1)>>2)])|0),HEAP32[(((tempDoublePtr)+(4))>>2)]=((HEAP32[((($_sroa_0_0__idx1)+(4))>>2)])|0),(+(HEAPF64[(tempDoublePtr)>>3]))) - // which has no side effects and is the special form of converting double to i64. - if (!(value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][2][0] == BINARY && value[1][2][2][1] == RSHIFT && - value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR)) { - // If not that, then traverse and scan normally. - sideEffects = hasSideEffects(value); - } - } - if (!sideEffects) { - varsToRemove[name] = !definitions[name] ? 2 : 1; // remove it normally - sideEffectFree.insert(name); - // Each time we remove a variable with 0 uses, if its value has no - // side effects and vanishes too, then we can remove a use from variables - // appearing in it, and possibly eliminate again - if (!!value) { - traversePre(value, [&](Ref node) { - if (node[0] == NAME) { - IString name = node[1]->getIString(); - node[1]->setString(EMPTY); // we can remove this - it will never be shown, and should not be left to confuse us as we traverse - if (asmData.isLocal(name)) { - uses[name]--; // cannot be infinite recursion since we descend an energy function - assert(uses[name] >= 0); - unprocessVariable(name); - processVariable(name); - } - } else if (node[0] == CALL) { - // no side effects, so this must be a Math.* call or such. We can just ignore it and all children - node[0]->setString(NAME); - node[1]->setString(EMPTY); - } - }); - } - } else { - varsToTryToRemove.insert(name); // try to remove it later during scanning - } - } - }; - for (auto name : asmData.locals) { - processVariable(name.first); - } - -#ifdef PROFILING - tvarcheck += clock() - start; - start = clock(); -#endif - - //printErr('defs: ' + JSON.stringify(definitions)); - //printErr('uses: ' + JSON.stringify(uses)); - //printErr('values: ' + JSON.stringify(values)); - //printErr('locals: ' + JSON.stringify(locals)); - //printErr('varsToRemove: ' + JSON.stringify(varsToRemove)); - //printErr('varsToTryToRemove: ' + JSON.stringify(varsToTryToRemove)); - values.clear(); - //printErr('potentials: ' + JSON.stringify(potentials)); - // We can now proceed through the function. In each list of statements, we try to eliminate - struct Tracking { - bool usesGlobals, usesMemory, hasDeps; - Ref defNode; - bool doesCall; - }; - class Tracked : public std::unordered_map { - public: - HASES - }; - Tracked tracked; - #define dumpTracked() { errv("tracking %d", tracked.size()); for (auto t : tracked) errv("... %s", t.first.c_str()); } - // Although a set would be more appropriate, it would also be slower - std::unordered_map depMap; - - bool globalsInvalidated = false; // do not repeat invalidations, until we track something new - bool memoryInvalidated = false; - bool callsInvalidated = false; - auto track = [&](IString name, Ref value, Ref defNode) { // add a potential that has just been defined to the tracked list, we hope to eliminate it - Tracking& track = tracked[name]; - track.usesGlobals = false; - track.usesMemory = false; - track.hasDeps = false; - track.defNode = defNode; - track.doesCall = false; - bool ignoreName = false; // one-time ignorings of names, as first op in sub and call - traversePre(value, [&](Ref node) { - Ref type = node[0]; - if (type == NAME) { - if (!ignoreName) { - IString depName = node[1]->getIString(); - if (!asmData.isLocal(depName)) { - track.usesGlobals = true; - } - if (!potentials.has(depName)) { // deps do not matter for potentials - they are defined once, so no complexity - depMap[depName].push_back(name); - track.hasDeps = true; - } - } else { - ignoreName = false; - } - } else if (type == SUB) { - track.usesMemory = true; - ignoreName = true; - } else if (type == CALL) { - track.usesGlobals = true; - track.usesMemory = true; - track.doesCall = true; - ignoreName = true; - } else { - ignoreName = false; - } - }); - if (track.usesGlobals) globalsInvalidated = false; - if (track.usesMemory) memoryInvalidated = false; - if (track.doesCall) callsInvalidated = false; - }; - - // TODO: invalidate using a sequence number for each type (if you were tracked before the last invalidation, you are cancelled). remove for.in loops - #define INVALIDATE(what, check) \ - auto invalidate##what = [&]() { \ - std::vector temp; \ - for (auto t : tracked) { \ - IString name = t.first; \ - Tracking& info = tracked[name]; \ - if (check) { \ - temp.push_back(name); \ - } \ - } \ - for (size_t i = 0; i < temp.size(); i++) { \ - tracked.erase(temp[i]); \ - } \ - }; - INVALIDATE(Globals, info.usesGlobals); - INVALIDATE(Memory, info.usesMemory); - INVALIDATE(Calls, info.doesCall); - - auto invalidateByDep = [&](IString dep) { - for (auto name : depMap[dep]) { - tracked.erase(name); - } - depMap.erase(dep); - }; - - std::function doEliminate; - - // Generate the sequence of execution. This determines what is executed before what, so we know what can be reordered. Using - // that, performs invalidations and eliminations - auto scan = [&](Ref node) { - bool abort = false; - bool allowTracking = true; // false inside an if; also prevents recursing in an if - std::function traverseInOrder = [&](Ref node, bool ignoreSub) { - if (abort) return; - Ref type = node[0]; - if (type == ASSIGN) { - Ref target = node[2]; - Ref value = node[3]; - bool nameTarget = target[0] == NAME; - // If this is an assign to a name, handle it below rather than - // traversing and treating as a read - if (!nameTarget) { - traverseInOrder(target, true); // evaluate left - } - traverseInOrder(value, false); // evaluate right - // do the actual assignment - if (nameTarget) { - IString name = target[1]->getIString(); - if (potentials.has(name) && allowTracking) { - track(name, node[3], node); - } else if (varsToTryToRemove.has(name)) { - // replace it in-place - safeCopy(node, value); - varsToRemove[name] = 2; - } else { - // expensive check for invalidating specific tracked vars. This list is generally quite short though, because of - // how we just eliminate in short spans and abort when control flow happens TODO: history numbers instead - invalidateByDep(name); // can happen more than once per dep.. - if (!asmData.isLocal(name) && !globalsInvalidated) { - invalidateGlobals(); - globalsInvalidated = true; - } - // if we can track this name (that we assign into), and it has 0 uses and we want to remove its VAR - // definition - then remove it right now, there is no later chance - if (allowTracking && varsToRemove.has(name) && uses[name] == 0) { - track(name, node[3], node); - doEliminate(name, node); - } - } - } else if (target[0] == SUB) { - if (isTempDoublePtrAccess(target)) { - if (!globalsInvalidated) { - invalidateGlobals(); - globalsInvalidated = true; - } - } else if (!memoryInvalidated) { - invalidateMemory(); - memoryInvalidated = true; - } - } - } else if (type == SUB) { - // Only keep track of the global array names in memsafe mode i.e. - // when they may change underneath us due to resizing - if (node[1][0] != NAME || memSafe) { - traverseInOrder(node[1], false); // evaluate inner - } - traverseInOrder(node[2], false); // evaluate outer - // ignoreSub means we are a write (happening later), not a read - if (!ignoreSub && !isTempDoublePtrAccess(node)) { - // do the memory access - if (!callsInvalidated) { - invalidateCalls(); - callsInvalidated = true; - } - } - } else if (type == BINARY) { - bool flipped = false; - if (ASSOCIATIVE_BINARIES.has(node[1]) && !NAME_OR_NUM.has(node[2][0]) && NAME_OR_NUM.has(node[3][0])) { // TODO recurse here? - // associatives like + and * can be reordered in the simple case of one of the sides being a name, since we assume they are all just numbers - Ref temp = node[2]; - node[2] = node[3]; - node[3] = temp; - flipped = true; - } - traverseInOrder(node[2], false); - traverseInOrder(node[3], false); - if (flipped && NAME_OR_NUM.has(node[2][0])) { // dunno if we optimized, but safe to flip back - and keeps the code closer to the original and more readable - Ref temp = node[2]; - node[2] = node[3]; - node[3] = temp; - } - } else if (type == NAME) { - IString name = node[1]->getIString(); - if (tracked.has(name)) { - doEliminate(name, node); - } else if (!asmData.isLocal(name) && !callsInvalidated && (memSafe || !HEAP_NAMES.has(name))) { // ignore HEAP8 etc when not memory safe, these are ok to - // access, e.g. SIMD_Int32x4_load(HEAP8, ...) - invalidateCalls(); - callsInvalidated = true; - } - } else if (type == UNARY_PREFIX || type == UNARY_POSTFIX) { - traverseInOrder(node[2], false); - } else if (IGNORABLE_ELIMINATOR_SCAN_NODES.has(type)) { - } else if (type == CALL) { - // Named functions never change and are therefore safe to not track - if (node[1][0] != NAME) { - traverseInOrder(node[1], false); - } - Ref args = node[2]; - for (size_t i = 0; i < args->size(); i++) { - traverseInOrder(args[i], false); - } - if (callHasSideEffects(node)) { - // these two invalidations will also invalidate calls - if (!globalsInvalidated) { - invalidateGlobals(); - globalsInvalidated = true; - } - if (!memoryInvalidated) { - invalidateMemory(); - memoryInvalidated = true; - } - } - } else if (type == IF) { - if (allowTracking) { - traverseInOrder(node[1], false); // can eliminate into condition, but nowhere else - if (!callsInvalidated) { // invalidate calls, since we cannot eliminate them into an if that may not execute! - invalidateCalls(); - callsInvalidated = true; - } - allowTracking = false; - traverseInOrder(node[2], false); // 2 and 3 could be 'parallel', really.. - if (!!node[3]) traverseInOrder(node[3], false); - allowTracking = true; - } else { - tracked.clear(); - } - } else if (type == BLOCK) { - Ref stats = getStatements(node); - if (!!stats) { - for (size_t i = 0; i < stats->size(); i++) { - traverseInOrder(stats[i], false); - } - } - } else if (type == STAT) { - traverseInOrder(node[1], false); - } else if (type == LABEL) { - traverseInOrder(node[2], false); - } else if (type == SEQ) { - traverseInOrder(node[1], false); - traverseInOrder(node[2], false); - } else if (type == DO) { - if (node[1][0] == NUM && node[1][1]->getNumber() == 0) { // one-time loop - traverseInOrder(node[2], false); - } else { - tracked.clear(); - } - } else if (type == RETURN) { - if (!!node[1]) traverseInOrder(node[1], false); - } else if (type == CONDITIONAL) { - if (!callsInvalidated) { // invalidate calls, since we cannot eliminate them into a branch of an LLVM select/JS conditional that does not execute - invalidateCalls(); - callsInvalidated = true; - } - traverseInOrder(node[1], false); - traverseInOrder(node[2], false); - traverseInOrder(node[3], false); - } else if (type == SWITCH) { - traverseInOrder(node[1], false); - Tracked originalTracked = tracked; - Ref cases = node[2]; - for (size_t i = 0; i < cases->size(); i++) { - Ref c = cases[i]; - assert(c[0]->isNull() || c[0][0] == NUM || (c[0][0] == UNARY_PREFIX && c[0][2][0] == NUM)); - Ref stats = c[1]; - for (size_t j = 0; j < stats->size(); j++) { - traverseInOrder(stats[j], false); - } - // We cannot track from one switch case into another if there are external dependencies, undo all new trackings - // Otherwise we can track, e.g. a var used in a case before assignment in another case is UB in asm.js, so no need for the assignment - // TODO: general framework here, use in if-else as well - std::vector toDelete; - for (auto t : tracked) { - if (!originalTracked.has(t.first)) { - Tracking& info = tracked[t.first]; - if (info.usesGlobals || info.usesMemory || info.hasDeps) { - toDelete.push_back(t.first); - } - } - } - for (auto t : toDelete) { - tracked.erase(t); - } - } - tracked.clear(); // do not track from inside the switch to outside - } else { - assert(ABORTING_ELIMINATOR_SCAN_NODES.has(type)); - tracked.clear(); - abort = true; - } - }; - traverseInOrder(node, false); - }; - //var eliminationLimit = 0; // used to debugging purposes - doEliminate = [&](IString name, Ref node) { - //if (eliminationLimit == 0) return; - //eliminationLimit--; - //printErr('elim!!!!! ' + name); - // yes, eliminate! - varsToRemove[name] = 2; // both assign and var definitions can have other vars we must clean up - assert(tracked.has(name)); - Tracking& info = tracked[name]; - Ref defNode = info.defNode; - assert(!!defNode); - if (!sideEffectFree.has(name)) { - assert(defNode[0] != VAR); - // assign - Ref value = defNode[3]; - // wipe out the assign - safeCopy(defNode, makeEmpty()); - // replace this node in-place - safeCopy(node, value); - } else { - // This has no side effects and no uses, empty it out in-place - safeCopy(node, makeEmpty()); - } - tracked.erase(name); - }; - traversePre(func, [&](Ref block) { - // Look for statements, including while-switch pattern - Ref stats = getStatements(block); - if (!stats && (block[0] == WHILE && block[2][0] == SWITCH)) { - stats = &(makeArray(1)->push_back(block[2])); - } - if (!stats) return; - tracked.clear(); - for (size_t i = 0; i < stats->size(); i++) { - Ref node = deStat(stats[i]); - Ref type = node[0]; - if (type == RETURN && i+1 < stats->size()) { - stats->setSize(i+1); // remove any code after a return - } - // Check for things that affect elimination - if (ELIMINATION_SAFE_NODES.has(type)) { -#ifdef PROFILING - tstmtelim += clock() - start; - start = clock(); -#endif - scan(node); -#ifdef PROFILING - tstmtscan += clock() - start; - start = clock(); -#endif - } else if (type == VAR) { - continue; // asm normalisation has reduced 'var' to just the names - } else { - tracked.clear(); // not a var or assign, break all potential elimination so far - } - } - }); - -#ifdef PROFILING - tstmtelim += clock() - start; - start = clock(); -#endif - - StringIntMap seenUses; - StringStringMap helperReplacements; // for looper-helper optimization - - // clean up vars, and loop variable elimination - traversePrePost(func, [&](Ref node) { - // pre - Ref type = node[0]; - /*if (type == VAR) { - node[1] = node[1].filter(function(pair) { return !varsToRemove[pair[0]] }); - if (node[1]->size() == 0) { - // wipe out an empty |var;| - node[0] = TOPLEVEL; - node[1] = []; - } - } else */ - if (type == ASSIGN && node[1]->isBool(true) && node[2][0] == NAME && node[3][0] == NAME && node[2][1] == node[3][1]) { - // elimination led to X = X, which we can just remove - safeCopy(node, makeEmpty()); - } - }, [&](Ref node) { - // post - Ref type = node[0]; - if (type == NAME) { - IString name = node[1]->getIString(); - if (helperReplacements.has(name)) { - node[1]->setString(helperReplacements[name]); - return; // no need to track this anymore, we can't loop-optimize more than once - } - // track how many uses we saw. we need to know when a variable is no longer used (hence we run this in the post) - seenUses[name]++; - } else if (type == WHILE) { - // try to remove loop helper variables specifically - Ref stats = node[2][1]; - Ref last = stats->back(); - if (!!last && last[0] == IF && last[2][0] == BLOCK && !!last[3] && last[3][0] == BLOCK) { - Ref ifTrue = last[2]; - Ref ifFalse = last[3]; - clearEmptyNodes(ifTrue[1]); - clearEmptyNodes(ifFalse[1]); - bool flip = false; - if (ifFalse[1]->size() > 0 && !!ifFalse[1][0] && !!ifFalse[1]->back() && ifFalse[1]->back()[0] == BREAK) { // canonicalize break in the if-true - Ref temp = ifFalse; - ifFalse = ifTrue; - ifTrue = temp; - flip = true; - } - if (ifTrue[1]->size() > 0 && !!ifTrue[1][0] && !!ifTrue[1]->back() && ifTrue[1]->back()[0] == BREAK) { - Ref assigns = ifFalse[1]; - clearEmptyNodes(assigns); - std::vector loopers, helpers; - for (size_t i = 0; i < assigns->size(); i++) { - if (assigns[i][0] == STAT && assigns[i][1][0] == ASSIGN) { - Ref assign = assigns[i][1]; - if (assign[1]->isBool(true) && assign[2][0] == NAME && assign[3][0] == NAME) { - IString looper = assign[2][1]->getIString(); - IString helper = assign[3][1]->getIString(); - if (definitions[helper] == 1 && seenUses[looper] == namings[looper] && - !helperReplacements.has(helper) && !helperReplacements.has(looper)) { - loopers.push_back(looper); - helpers.push_back(helper); - } - } - } - } - // remove loop vars that are used in the rest of the else - for (size_t i = 0; i < assigns->size(); i++) { - if (assigns[i][0] == STAT && assigns[i][1][0] == ASSIGN) { - Ref assign = assigns[i][1]; - if (!(assign[1]->isBool(true) && assign[2][0] == NAME && assign[3][0] == NAME) || indexOf(loopers, assign[2][1]->getIString()) < 0) { - // this is not one of the loop assigns - traversePre(assign, [&](Ref node) { - if (node[0] == NAME) { - int index = indexOf(loopers, node[1]->getIString()); - if (index < 0) index = indexOf(helpers, node[1]->getIString()); - if (index >= 0) { - loopers.erase(loopers.begin() + index); - helpers.erase(helpers.begin() + index); - } - } - }); - } - } - } - // remove loop vars that are used in the if - traversePre(ifTrue, [&](Ref node) { - if (node[0] == NAME) { - int index = indexOf(loopers, node[1]->getIString()); - if (index < 0) index = indexOf(helpers, node[1]->getIString()); - if (index >= 0) { - loopers.erase(loopers.begin() + index); - helpers.erase(helpers.begin() + index); - } - } - }); - if (loopers.size() == 0) return; - for (size_t l = 0; l < loopers.size(); l++) { - IString looper = loopers[l]; - IString helper = helpers[l]; - // the remaining issue is whether loopers are used after the assignment to helper and before the last line (where we assign to it) - int found = -1; - for (int i = (int)stats->size()-2; i >= 0; i--) { - Ref curr = stats[i]; - if (curr[0] == STAT && curr[1][0] == ASSIGN) { - Ref currAssign = curr[1]; - if (currAssign[1]->isBool(true) && currAssign[2][0] == NAME) { - IString to = currAssign[2][1]->getIString(); - if (to == helper) { - found = i; - break; - } - } - } - } - if (found < 0) return; - // if a loop variable is used after we assigned to the helper, we must save its value and use that. - // (note that this can happen due to elimination, if we eliminate an expression containing the - // loop var far down, past the assignment!) - // first, see if the looper and helpers overlap. Note that we check for this looper, compared to - // *ALL* the helpers. Helpers will be replaced by loopers as we eliminate them, potentially - // causing conflicts, so any helper is a concern. - int firstLooperUsage = -1; - int lastLooperUsage = -1; - int firstHelperUsage = -1; - for (int i = found+1; i < (int)stats->size(); i++) { - Ref curr = i < (int)stats->size()-1 ? stats[i] : last[1]; // on the last line, just look in the condition - traversePre(curr, [&](Ref node) { - if (node[0] == NAME) { - if (node[1] == looper) { - if (firstLooperUsage < 0) firstLooperUsage = i; - lastLooperUsage = i; - } else if (indexOf(helpers, node[1]->getIString()) >= 0) { - if (firstHelperUsage < 0) firstHelperUsage = i; - } - } - }); - } - if (firstLooperUsage >= 0) { - // the looper is used, we cannot simply merge the two variables - if ((firstHelperUsage < 0 || firstHelperUsage > lastLooperUsage) && lastLooperUsage+1 < (int)stats->size() && triviallySafeToMove(stats[found], asmData) && - seenUses[helper] == namings[helper]) { - // the helper is not used, or it is used after the last use of the looper, so they do not overlap, - // and the last looper usage is not on the last line (where we could not append after it), and the - // helper is not used outside of the loop. - // just move the looper definition to after the looper's last use - stats->insert(lastLooperUsage+1, stats[found]); - stats->splice(found, 1); - } else { - // they overlap, we can still proceed with the loop optimization, but we must introduce a - // loop temp helper variable - IString temp(strdupe((std::string(looper.c_str()) + "$looptemp").c_str())); - assert(!asmData.isLocal(temp)); - for (int i = firstLooperUsage; i <= lastLooperUsage; i++) { - Ref curr = i < (int)stats->size()-1 ? stats[i] : last[1]; // on the last line, just look in the condition - - std::function looperToLooptemp = [&](Ref node) { - if (node[0] == NAME) { - if (node[1] == looper) { - node[1]->setString(temp); - } - } else if (node[0] == ASSIGN && node[2][0] == NAME) { - // do not traverse the assignment target, phi assignments to the loop variable must remain - traversePrePostConditional(node[3], looperToLooptemp, [](Ref node){}); - return false; - } - return true; - }; - traversePrePostConditional(curr, looperToLooptemp, [](Ref node){}); - } - asmData.addVar(temp, asmData.getType(looper)); - stats->insert(found, make1(STAT, make3(ASSIGN, makeBool(true), makeName(temp), makeName(looper)))); - } - } - } - for (size_t l = 0; l < helpers.size(); l++) { - for (size_t k = 0; k < helpers.size(); k++) { - if (l != k && helpers[l] == helpers[k]) return; // it is complicated to handle a shared helper, abort - } - } - // hurrah! this is safe to do - for (size_t l = 0; l < loopers.size(); l++) { - IString looper = loopers[l]; - IString helper = helpers[l]; - varsToRemove[helper] = 2; - traversePre(node, [&](Ref node) { // replace all appearances of helper with looper - if (node[0] == NAME && node[1] == helper) node[1]->setString(looper); - }); - helperReplacements[helper] = looper; // replace all future appearances of helper with looper - helperReplacements[looper] = looper; // avoid any further attempts to optimize looper in this manner (seenUses is wrong anyhow, too) - } - // simplify the if. we remove the if branch, leaving only the else - if (flip) { - last[1] = simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, last[1])); - Ref temp = last[2]; - last[2] = last[3]; - last[3] = temp; - } - if (loopers.size() == assigns->size()) { - last->pop_back(); - } else { - Ref elseStats = getStatements(last[3]); - for (size_t i = 0; i < elseStats->size(); i++) { - Ref stat = deStat(elseStats[i]); - if (stat[0] == ASSIGN && stat[2][0] == NAME) { - if (indexOf(loopers, stat[2][1]->getIString()) >= 0) { - elseStats[i] = makeEmpty(); - } - } - } - } - } - } - } - }); - -#ifdef PROFILING - tcleanvars += clock() - start; - start = clock(); -#endif - - for (auto v : varsToRemove) { - if (v.second == 2 && asmData.isVar(v.first)) asmData.deleteVar(v.first); - } - - asmData.denormalize(); - -#ifdef PROFILING - treconstruct += clock() - start; - start = clock(); -#endif - - }); - - removeAllEmptySubNodes(ast); - -#ifdef PROFILING - errv(" EL stages: a:%li fe:%li vc:%li se:%li (ss:%li) cv:%li r:%li", - tasmdata, tfnexamine, tvarcheck, tstmtelim, tstmtscan, tcleanvars, treconstruct); -#endif -} - -void eliminateMemSafe(Ref ast) { - eliminate(ast, true); -} - -void simplifyExpressions(Ref ast) { - // Simplify common expressions used to perform integer conversion operations - // in cases where no conversion is needed. - auto simplifyIntegerConversions = [](Ref ast) { - traversePre(ast, [](Ref node) { - Ref type = node[0]; - if (type == BINARY && node[1] == RSHIFT && node[3][0] == NUM && - node[2][0] == BINARY && node[2][1] == LSHIFT && node[2][3][0] == NUM && node[3][1]->getNumber() == node[2][3][1]->getNumber()) { - // Transform (x&A)<>B to X&A. - Ref innerNode = node[2][2]; - double shifts = node[3][1]->getNumber(); - if (innerNode[0] == BINARY && innerNode[1] == AND && innerNode[3][0] == NUM) { - double mask = innerNode[3][1]->getNumber(); - if (isInteger32(mask) && isInteger32(shifts) && ((jsD2I(mask) << jsD2I(shifts)) >> jsD2I(shifts)) == jsD2I(mask)) { - safeCopy(node, innerNode); - return; - } - } - } else if (type == BINARY && BITWISE.has(node[1])) { - for (int i = 2; i <= 3; i++) { - Ref subNode = node[i]; - if (subNode[0] == BINARY && subNode[1] == AND && subNode[3][0] == NUM && subNode[3][1]->getNumber() == 1) { - // Rewrite (X < Y) & 1 to X < Y , when it is going into a bitwise operator. We could - // remove even more (just replace &1 with |0, then subsequent passes could remove the |0) - // but v8 issue #2513 means the code would then run very slowly in chrome. - Ref input = subNode[2]; - if (input[0] == BINARY && COMPARE_OPS.has(input[1])) { - safeCopy(node[i], input); - } - } - } - } - }); - }; - - // When there is a bunch of math like (((8+5)|0)+12)|0, only the external |0 is needed, one correction is enough. - // At each node, ((X|0)+Y)|0 can be transformed into (X+Y): The inner corrections are not needed - // TODO: Is the same is true for 0xff, 0xffff? - // Likewise, if we have |0 inside a block that will be >>'d, then the |0 is unnecessary because some - // 'useful' mathops already |0 anyhow. - - auto simplifyOps = [](Ref ast) { - auto removeMultipleOrZero = [&ast] { - bool rerun = true; - while (rerun) { - rerun = false; - std::vector stack; - std::function process = [&stack, &rerun, &process, &ast](Ref node) { - Ref type = node[0]; - if (type == BINARY && node[1] == OR) { - if (node[2][0] == NUM && node[3][0] == NUM) { - node[2][1]->setNumber(jsD2I(node[2][1]->getNumber()) | jsD2I(node[3][1]->getNumber())); - stack.push_back(0); - safeCopy(node, node[2]); - return; - } - bool go = false; - if (node[2][0] == NUM && node[2][1]->getNumber() == 0) { - // canonicalize order - Ref temp = node[3]; - node[3] = node[2]; - node[2] = temp; - go = true; - } else if (node[3][0] == NUM && node[3][1]->getNumber() == 0) { - go = true; - } - if (!go) { - stack.push_back(1); - return; - } - // We might be able to remove this correction - for (int i = stack.size()-1; i >= 0; i--) { - if (stack[i] >= 1) { - if (stack.back() < 2 && node[2][0] == CALL) break; // we can only remove multiple |0s on these - if (stack.back() < 1 && (COERCION_REQUIRING_OPS.has(node[2][0]) || - (node[2][0] == BINARY && COERCION_REQUIRING_BINARIES.has(node[2][1])))) break; // we can remove |0 or >>2 - // we will replace ourselves with the non-zero side. Recursively process that node. - Ref result = node[2][0] == NUM && node[2][1]->getNumber() == 0 ? node[3] : node[2], other; - // replace node in-place - safeCopy(node, result); - rerun = true; - process(result); - return; - } else if (stack[i] == -1) { - break; // Too bad, we can't - } - } - stack.push_back(2); // From here on up, no need for this kind of correction, it's done at the top - // (Add this at the end, so it is only added if we did not remove it) - } else if (type == BINARY && USEFUL_BINARY_OPS.has(node[1])) { - stack.push_back(1); - } else if ((type == BINARY && SAFE_BINARY_OPS.has(node[1])) || type == NUM || type == NAME) { - stack.push_back(0); // This node is safe in that it does not interfere with this optimization - } else if (type == UNARY_PREFIX && node[1] == B_NOT) { - stack.push_back(1); - } else { - stack.push_back(-1); // This node is dangerous! Give up if you see this before you see '1' - } - }; - - traversePrePost(ast, process, [&stack](Ref node) { - assert(!stack.empty()); - stack.pop_back(); - }); - } - }; - - removeMultipleOrZero(); - - // & and heap-related optimizations - - bool hasTempDoublePtr = false, rerunOrZeroPass = false; - - traversePrePostConditional(ast, [](Ref node) { - // Detect trees which should not - // be simplified. - if (node[0] == SUB && node[1][0] == NAME && isFunctionTable(node[1][1])) { - return false; // do not traverse subchildren here, we should not collapse 55 & 126. - } - return true; - }, [&hasTempDoublePtr, &rerunOrZeroPass](Ref node) { - // Simplifications are done now so - // that we simplify a node's operands before the node itself. This allows - // optimizations to cascade. - Ref type = node[0]; - if (type == NAME) { - if (node[1] == TEMP_DOUBLE_PTR) hasTempDoublePtr = true; - } else if (type == BINARY && node[1] == AND && node[3][0] == NUM) { - if (node[2][0] == NUM) { - safeCopy(node, makeNum(jsD2I(node[2][1]->getNumber()) & jsD2I(node[3][1]->getNumber()))); - return; - } - Ref input = node[2]; - double amount = node[3][1]->getNumber(); - if (input[0] == BINARY && input[1] == AND && input[3][0] == NUM) { - // Collapse X & 255 & 1 - node[3][1]->setNumber(jsD2I(amount) & jsD2I(input[3][1]->getNumber())); - node[2] = input[2]; - } else if (input[0] == SUB && input[1][0] == NAME) { - // HEAP8[..] & 255 => HEAPU8[..] - HeapInfo hi = parseHeap(input[1][1]->getCString()); - if (hi.valid) { - if (isInteger32(amount) && amount == powl(2, hi.bits)-1) { - if (!hi.unsign) { - input[1][1]->setString(getHeapStr(hi.bits, true)); // make unsigned - } - // we cannot return HEAPU8 without a coercion, but at least we do HEAP8 & 255 => HEAPU8 | 0 - node[1]->setString(OR); - node[3][1]->setNumber(0); - return; - } - } - } else if (input[0] == BINARY && input[1] == RSHIFT && - input[2][0] == BINARY && input[2][1] == LSHIFT && - input[2][3][0] == NUM && input[3][0] == NUM && - input[2][3][1]->getInteger() == input[3][1]->getInteger() && - (~(0xFFFFFFFFu >> input[3][1]->getInteger()) & jsD2I(amount)) == 0) { - // x << 24 >> 24 & 255 => x & 255 - return safeCopy(node, make3(BINARY, AND, input[2][2], node[3])); - } - } else if (type == BINARY && node[1] == XOR) { - // LLVM represents bitwise not as xor with -1. Translate it back to an actual bitwise not. - if (node[3][0] == UNARY_PREFIX && node[3][1] == MINUS && node[3][2][0] == NUM && - node[3][2][1]->getNumber() == 1 && - !(node[2][0] == UNARY_PREFIX && node[2][1] == B_NOT)) { // avoid creating ~~~ which is confusing for asm given the role of ~~ - safeCopy(node, make2(UNARY_PREFIX, B_NOT, node[2])); - return; - } - } else if (type == BINARY && node[1] == RSHIFT && node[3][0] == NUM && - node[2][0] == BINARY && node[2][1] == LSHIFT && node[2][3][0] == NUM && - node[2][2][0] == SUB && node[2][2][1][0] == NAME) { - // collapse HEAPU?8[..] << 24 >> 24 etc. into HEAP8[..] | 0 - double amount = node[3][1]->getNumber(); - if (amount == node[2][3][1]->getNumber()) { - HeapInfo hi = parseHeap(node[2][2][1][1]->getCString()); - if (hi.valid && hi.bits == 32 - amount) { - node[2][2][1][1]->setString(getHeapStr(hi.bits, false)); - node[1]->setString(OR); - node[2] = node[2][2]; - node[3][1]->setNumber(0); - rerunOrZeroPass = true; - return; - } - } - } else if (type == ASSIGN) { - // optimizations for assigning into HEAP32 specifically - if (node[1]->isBool(true) && node[2][0] == SUB && node[2][1][0] == NAME) { - if (node[2][1][1] == HEAP32) { - // HEAP32[..] = x | 0 does not need the | 0 (unless it is a mandatory |0 of a call) - if (node[3][0] == BINARY && node[3][1] == OR) { - if (node[3][2][0] == NUM && node[3][2][1]->getNumber() == 0 && node[3][3][0] != CALL) { - node[3] = node[3][3]; - } else if (node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0 && node[3][2][0] != CALL) { - node[3] = node[3][2]; - } - } - } else if (node[2][1][1] == HEAP8) { - // HEAP8[..] = x & 0xff does not need the & 0xff - if (node[3][0] == BINARY && node[3][1] == AND && node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0xff) { - node[3] = node[3][2]; - } - } else if (node[2][1][1] == HEAP16) { - // HEAP16[..] = x & 0xffff does not need the & 0xffff - if (node[3][0] == BINARY && node[3][1] == AND && node[3][3][0] == NUM && node[3][3][1]->getNumber() == 0xffff) { - node[3] = node[3][2]; - } - } - } - Ref value = node[3]; - if (value[0] == BINARY && value[1] == OR) { - // canonicalize order of |0 to end - if (value[2][0] == NUM && value[2][1]->getNumber() == 0) { - Ref temp = value[2]; - value[2] = value[3]; - value[3] = temp; - } - // if a seq ends in an |0, remove an external |0 - // note that it is only safe to do this in assigns, like we are doing here (return (x, y|0); is not valid) - if (value[2][0] == SEQ && value[2][2][0] == BINARY && USEFUL_BINARY_OPS.has(value[2][2][1])) { - node[3] = value[2]; - } - } - } else if (type == BINARY && node[1] == RSHIFT && node[2][0] == NUM && node[3][0] == NUM) { - // optimize num >> num, in asm we need this since we do not optimize shifts in asm.js - node[0]->setString(NUM); - node[1]->setNumber(jsD2I(node[2][1]->getNumber()) >> jsD2I(node[3][1]->getNumber())); - node->setSize(2); - return; - } else if (type == BINARY && node[1] == PLUS) { - // The most common mathop is addition, e.g. in getelementptr done repeatedly. We can join all of those, - // by doing (num+num) ==> newnum. - if (node[2][0] == NUM && node[3][0] == NUM) { - node[2][1]->setNumber(jsD2I(node[2][1]->getNumber()) + jsD2I(node[3][1]->getNumber())); - safeCopy(node, node[2]); - return; - } - } - }); - - if (rerunOrZeroPass) removeMultipleOrZero(); - - if (hasTempDoublePtr) { - AsmData asmData(ast); - traversePre(ast, [](Ref node) { - Ref type = node[0]; - if (type == ASSIGN) { - if (node[1]->isBool(true) && node[2][0] == SUB && node[2][1][0] == NAME && node[2][1][1] == HEAP32) { - // remove bitcasts that are now obviously pointless, e.g. - // HEAP32[$45 >> 2] = HEAPF32[tempDoublePtr >> 2] = ($14 < $28 ? $14 : $28) - $42, HEAP32[tempDoublePtr >> 2] | 0; - Ref value = node[3]; - if (value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][1][0] == NAME && value[1][2][1][1] == HEAPF32 && - value[1][2][2][0] == BINARY && value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR) { - // transform to HEAPF32[$45 >> 2] = ($14 < $28 ? $14 : $28) - $42; - node[2][1][1]->setString(HEAPF32); - node[3] = value[1][3]; - } - } - } else if (type == SEQ) { - // (HEAP32[tempDoublePtr >> 2] = HEAP32[$37 >> 2], +HEAPF32[tempDoublePtr >> 2]) - // ==> - // +HEAPF32[$37 >> 2] - if (node[0] == SEQ && node[1][0] == ASSIGN && node[1][2][0] == SUB && node[1][2][1][0] == NAME && - (node[1][2][1][1] == HEAP32 || node[1][2][1][1] == HEAPF32) && - node[1][2][2][0] == BINARY && node[1][2][2][2][0] == NAME && node[1][2][2][2][1] == TEMP_DOUBLE_PTR && - node[1][3][0] == SUB && node[1][3][1][0] == NAME && (node[1][3][1][1] == HEAP32 || node[1][3][1][1] == HEAPF32) && - node[2][0] != SEQ) { // avoid (x, y, z) which can be used for tempDoublePtr on doubles for alignment fixes - if (node[1][2][1][1] == HEAP32) { - node[1][3][1][1]->setString(HEAPF32); - safeCopy(node, makeAsmCoercion(node[1][3], detectType(node[2]))); - return; - } else { - node[1][3][1][1]->setString(HEAP32); - safeCopy(node, make3(BINARY, OR, node[1][3], makeNum(0))); - return; - } - } - } - }); - - // finally, wipe out remaining ones by finding cases where all assignments to X are bitcasts, and all uses are writes to - // the other heap type, then eliminate the bitcast - struct BitcastData { - int define_HEAP32, define_HEAPF32, use_HEAP32, use_HEAPF32, namings; - bool ok; - std::vector defines, uses; - - BitcastData() : define_HEAP32(0), define_HEAPF32(0), use_HEAP32(0), use_HEAPF32(0), namings(0), ok(false) {} - }; - std::unordered_map bitcastVars; - traversePre(ast, [&bitcastVars](Ref node) { - if (node[0] == ASSIGN && node[1]->isBool(true) && node[2][0] == NAME) { - Ref value = node[3]; - if (value[0] == SEQ && value[1][0] == ASSIGN && value[1][2][0] == SUB && value[1][2][1][0] == NAME && - (value[1][2][1][1] == HEAP32 || value[1][2][1][1] == HEAPF32) && - value[1][2][2][0] == BINARY && value[1][2][2][2][0] == NAME && value[1][2][2][2][1] == TEMP_DOUBLE_PTR) { - IString name = node[2][1]->getIString(); - IString heap = value[1][2][1][1]->getIString(); - if (heap == HEAP32) { - bitcastVars[name].define_HEAP32++; - } else { - assert(heap == HEAPF32); - bitcastVars[name].define_HEAPF32++; - } - bitcastVars[name].defines.push_back(node); - bitcastVars[name].ok = true; - } - } - }); - traversePre(ast, [&bitcastVars](Ref node) { - Ref type = node[0]; - if (type == NAME && bitcastVars[node[1]->getCString()].ok) { - bitcastVars[node[1]->getCString()].namings++; - } else if (type == ASSIGN && node[1]->isBool(true)) { - Ref value = node[3]; - if (value[0] == NAME) { - IString name = value[1]->getIString(); - if (bitcastVars[name].ok) { - Ref target = node[2]; - if (target[0] == SUB && target[1][0] == NAME && (target[1][1] == HEAP32 || target[1][1] == HEAPF32)) { - if (target[1][1] == HEAP32) { - bitcastVars[name].use_HEAP32++; - } else { - bitcastVars[name].use_HEAPF32++; - } - bitcastVars[name].uses.push_back(node); - } - } - } - } - }); - for (auto iter : bitcastVars) { - const IString& v = iter.first; - BitcastData& info = iter.second; - // good variables define only one type, use only one type, have definitions and uses, and define as a different type than they use - if (info.define_HEAP32*info.define_HEAPF32 == 0 && info.use_HEAP32*info.use_HEAPF32 == 0 && - info.define_HEAP32+info.define_HEAPF32 > 0 && info.use_HEAP32+info.use_HEAPF32 > 0 && - info.define_HEAP32*info.use_HEAP32 == 0 && info.define_HEAPF32*info.use_HEAPF32 == 0 && - asmData.isLocal(v.c_str()) && info.namings == info.define_HEAP32+info.define_HEAPF32+info.use_HEAP32+info.use_HEAPF32) { - IString& correct = info.use_HEAP32 ? HEAPF32 : HEAP32; - for (auto define : info.defines) { - define[3] = define[3][1][3]; - if (correct == HEAP32) { - define[3] = make3(BINARY, OR, define[3], makeNum(0)); - } else { - assert(correct == HEAPF32); - define[3] = makeAsmCoercion(define[3], preciseF32 ? ASM_FLOAT : ASM_DOUBLE); - } - // do we want a simplifybitops on the new values here? - } - for (auto use : info.uses) { - use[2][1][1]->setString(correct.c_str()); - } - AsmType correctType; - switch(asmData.getType(v.c_str())) { - case ASM_INT: correctType = preciseF32 ? ASM_FLOAT : ASM_DOUBLE; break; - case ASM_FLOAT: case ASM_DOUBLE: correctType = ASM_INT; break; - default: assert(0); - } - asmData.setType(v.c_str(), correctType); - } - } - asmData.denormalize(); - } - }; - - std::function emitsBoolean = [&emitsBoolean](Ref node) { - Ref type = node[0]; - if (type == NUM) { - return node[1]->getNumber() == 0 || node[1]->getNumber() == 1; - } - if (type == BINARY) return COMPARE_OPS.has(node[1]); - if (type == UNARY_PREFIX) return node[1] == L_NOT; - if (type == CONDITIONAL) return emitsBoolean(node[2]) && emitsBoolean(node[3]); - return false; - }; - - // expensive | expensive can be turned into expensive ? 1 : expensive, and - // expensive | cheap can be turned into cheap ? 1 : expensive, - // so that we can avoid the expensive computation, if it has no side effects. - auto conditionalize = [&emitsBoolean](Ref ast) { - traversePre(ast, [&emitsBoolean](Ref node) { - const int MIN_COST = 7; - if (node[0] == BINARY && (node[1] == OR || node[1] == AND) && node[3][0] != NUM && node[2][0] != NUM) { - // logical operator on two non-numerical values - Ref left = node[2]; - Ref right = node[3]; - if (!emitsBoolean(left) || !emitsBoolean(right)) return; - bool leftEffects = hasSideEffects(left); - bool rightEffects = hasSideEffects(right); - if (leftEffects && rightEffects) return; // both must execute - // canonicalize with side effects, if any, happening on the left - if (rightEffects) { - if (measureCost(left) < MIN_COST) return; // avoidable code is too cheap - Ref temp = left; - left = right; - right = temp; - } else if (leftEffects) { - if (measureCost(right) < MIN_COST) return; // avoidable code is too cheap - } else { - // no side effects, reorder based on cost estimation - int leftCost = measureCost(left); - int rightCost = measureCost(right); - if (std::max(leftCost, rightCost) < MIN_COST) return; // avoidable code is too cheap - // canonicalize with expensive code on the right - if (leftCost > rightCost) { - Ref temp = left; - left = right; - right = temp; - } - } - // worth it, perform conditionalization - Ref ret; - if (node[1] == OR) { - ret = make3(CONDITIONAL, left, makeNum(1), right); - } else { // & - ret = make3(CONDITIONAL, left, right, makeNum(0)); - } - if (left[0] == UNARY_PREFIX && left[1] == L_NOT) { - ret[1] = flipCondition(left); - Ref temp = ret[2]; - ret[2] = ret[3]; - ret[3] = temp; - } - safeCopy(node, ret); - return; - } - }); - }; - - traverseFunctions(ast, [&](Ref func) { - simplifyIntegerConversions(func); - simplifyOps(func); - traversePre(func, [](Ref node) { - Ref ret = simplifyNotCompsDirect(node); - if (ret.get() != node.get()) { // if we received a different pointer in return, then we need to copy the new value - safeCopy(node, ret); - } - }); - conditionalize(func); - }); -} - -void simplifyIfs(Ref ast) { - traverseFunctions(ast, [](Ref func) { - bool simplifiedAnElse = false; - - traversePre(func, [&simplifiedAnElse](Ref node) { - // simplify if (x) { if (y) { .. } } to if (x ? y : 0) { .. } - if (node[0] == IF) { - Ref body = node[2]; - // recurse to handle chains - while (body[0] == BLOCK) { - Ref stats = body[1]; - if (stats->size() == 0) break; - Ref other = stats->back(); - if (other[0] != IF) { - // our if block does not end with an if. perhaps if have an else we can flip - if (node->size() > 3 && !!node[3] && node[3][0] == BLOCK) { - stats = node[3][1]; - if (stats->size() == 0) break; - other = stats->back(); - if (other[0] == IF) { - // flip node - node[1] = flipCondition(node[1]); - node[2] = node[3]; - node[3] = body; - body = node[2]; - } else break; - } else break; - } - // we can handle elses, but must be fully identical - if (!!node[3] || !!other[3]) { - if (!node[3]) break; - if (!node[3]->deepCompare(other[3])) { - // the elses are different, but perhaps if we flipped a condition we can do better - if (node[3]->deepCompare(other[2])) { - // flip other. note that other may not have had an else! add one if so; we will eliminate such things later - if (!other[3]) other[3] = makeBlock(); - other[1] = flipCondition(other[1]); - Ref temp = other[2]; - other[2] = other[3]; - other[3] = temp; - } else break; - } - } - if (stats->size() > 1) { - // try to commaify - turn everything between the ifs into a comma operator inside the second if - bool ok = true; - for (size_t i = 0; i+1 < stats->size(); i++) { - Ref curr = deStat(stats[i]); - if (!commable(curr)) ok = false; - } - if (!ok) break; - for (int i = stats->size()-2; i >= 0; i--) { - Ref curr = deStat(stats[i]); - other[1] = make2(SEQ, curr, other[1]); - } - Ref temp = makeArray(1); - temp->push_back(other); - stats = body[1] = temp; - } - if (stats->size() != 1) break; - if (!!node[3]) simplifiedAnElse = true; - node[1] = make3(CONDITIONAL, node[1], other[1], makeNum(0)); - body = node[2] = other[2]; - } - } - }); - - if (simplifiedAnElse) { - // there may be fusing opportunities - - // we can only fuse if we remove all uses of the label. if there are - // other ones - if the label check can be reached from elsewhere - - // we must leave it - bool abort = false; - - std::unordered_map labelAssigns; - - traversePre(func, [&labelAssigns, &abort](Ref node) { - if (node[0] == ASSIGN && node[2][0] == NAME && node[2][1] == LABEL) { - if (node[3][0] == NUM) { - int value = node[3][1]->getInteger(); - labelAssigns[value] = labelAssigns[value] + 1; - } else { - // label is assigned a dynamic value (like from indirectbr), we cannot do anything - abort = true; - } - } - }); - if (abort) return; - - std::unordered_map labelChecks; - - traversePre(func, [&labelChecks, &abort](Ref node) { - if (node[0] == BINARY && node[1] == EQ && node[2][0] == BINARY && node[2][1] == OR && - node[2][2][0] == NAME && node[2][2][1] == LABEL) { - if (node[3][0] == NUM) { - int value = node[3][1]->getInteger(); - labelChecks[value] = labelChecks[value] + 1; - } else { - // label is checked vs a dynamic value (like from indirectbr), we cannot do anything - abort = true; - } - } - }); - if (abort) return; - - int inLoop = 0; // when in a loop, we do not emit label = 0; in the relooper as there is no need - traversePrePost(func, [&inLoop, &labelAssigns, &labelChecks](Ref node) { - if (node[0] == WHILE) inLoop++; - Ref stats = getStatements(node); - if (!!stats && stats->size() > 0) { - for (int i = 0; i < (int)stats->size()-1; i++) { - Ref pre = stats[i]; - Ref post = stats[i+1]; - if (pre[0] == IF && pre->size() > 3 && !!pre[3] && post[0] == IF && (post->size() <= 3 || !post[3])) { - Ref postCond = post[1]; - if (postCond[0] == BINARY && postCond[1] == EQ && - postCond[2][0] == BINARY && postCond[2][1] == OR && - postCond[2][2][0] == NAME && postCond[2][2][1] == LABEL && - postCond[2][3][0] == NUM && postCond[2][3][1]->getNumber() == 0 && - postCond[3][0] == NUM) { - int postValue = postCond[3][1]->getInteger(); - Ref preElse = pre[3]; - if (labelAssigns[postValue] == 1 && labelChecks[postValue] == 1 && preElse[0] == BLOCK && preElse->size() >= 2 && preElse[1]->size() == 1) { - Ref preStat = preElse[1][0]; - if (preStat[0] == STAT && preStat[1][0] == ASSIGN && - preStat[1][1]->isBool(true) && preStat[1][2][0] == NAME && preStat[1][2][1] == LABEL && - preStat[1][3][0] == NUM && preStat[1][3][1]->getNumber() == postValue) { - // Conditions match, just need to make sure the post clears label - if (post[2][0] == BLOCK && post[2]->size() >= 2 && post[2][1]->size() > 0) { - Ref postStat = post[2][1][0]; - bool haveClear = - postStat[0] == STAT && postStat[1][0] == ASSIGN && - postStat[1][1]->isBool(true) && postStat[1][2][0] == NAME && postStat[1][2][1] == LABEL && - postStat[1][3][0] == NUM && postStat[1][3][1]->getNumber() == 0; - if (!inLoop || haveClear) { - // Everything lines up, do it - pre[3] = post[2]; - if (haveClear) pre[3][1]->splice(0, 1); // remove the label clearing - stats->splice(i+1, 1); // remove the post entirely - } - } - } - } - } - } - } - } - }, [&inLoop](Ref node) { - if (node[0] == WHILE) inLoop--; - }); - assert(inLoop == 0); - } - }); -} - -void optimizeFrounds(Ref ast) { - // collapse fround(fround(..)), which can happen due to elimination - // also emit f0 instead of fround(0) (except in returns) - int inReturn = 0; - traversePrePost(ast, [&](Ref node) { - if (node[0] == RETURN) { - inReturn++; - } - }, [&](Ref node) { - if (node[0] == RETURN) { - inReturn--; - } - if (node[0] == CALL && node[1][0] == NAME && node[1][1] == MATH_FROUND) { - Ref arg = node[2][0]; - if (arg[0] == NUM) { - if (!inReturn && arg[1]->getInteger() == 0) { - safeCopy(node, makeName(F0)); - } - } else if (arg[0] == CALL && arg[1][0] == NAME && arg[1][1] == MATH_FROUND) { - safeCopy(node, arg); - } - } - }); -} - -// Very simple 'registerization', coalescing of variables into a smaller number. - -const char* getRegPrefix(AsmType type) { - switch (type) { - case ASM_INT: return "i"; break; - case ASM_DOUBLE: return "d"; break; - case ASM_FLOAT: return "f"; break; - case ASM_FLOAT32X4: return "F4"; break; - case ASM_FLOAT64X2: return "F2"; break; - case ASM_INT8X16: return "I16"; break; - case ASM_INT16X8: return "I8"; break; - case ASM_INT32X4: return "I4"; break; - case ASM_NONE: return "Z"; break; - default: assert(0); // type doesn't have a name yet - } - return nullptr; -} - -IString getRegName(AsmType type, int num) { - const char* str = getRegPrefix(type); - const int size = 256; - char temp[size]; - int written = sprintf(temp, "%s%d", str, num); - assert(written < size); - temp[written] = 0; - IString ret; - ret.set(temp, false); - return ret; -} - -void registerize(Ref ast) { - traverseFunctions(ast, [](Ref fun) { - AsmData asmData(fun); - // Add parameters as a first (fake) var (with assignment), so they get taken into consideration - // note: params are special, they can never share a register between them (see later) - Ref fake; - if (!!fun[2] && fun[2]->size()) { - Ref assign = makeNum(0); - // TODO: will be an isEmpty here, can reuse it. - fun[3]->insert(0, make1(VAR, fun[2]->map([&assign](Ref param) { - return &(makeArray(2)->push_back(param).push_back(assign)); - }))); - } - // Replace all var definitions with assignments; we will add var definitions at the top after we registerize - StringSet allVars; - traversePre(fun, [&](Ref node) { - Ref type = node[0]; - if (type == VAR) { - Ref vars = node[1]->filter([](Ref varr) { return varr->size() > 1; }); - if (vars->size() >= 1) { - safeCopy(node, unVarify(vars)); - } else { - safeCopy(node, makeEmpty()); - } - } else if (type == NAME) { - allVars.insert(node[1]->getIString()); - } - }); - removeAllUselessSubNodes(fun); // vacuum? - StringTypeMap regTypes; // reg name -> type - auto getNewRegName = [&](int num, IString name) { - AsmType type = asmData.getType(name); - IString ret = getRegName(type, num); - assert(!allVars.has(ret) || asmData.isLocal(ret)); // register must not shadow non-local name - regTypes[ret] = type; - return ret; - }; - // Find the # of uses of each variable. - // While doing so, check if all a variable's uses are dominated in a simple - // way by a simple assign, if so, then we can assign its register to it - // just for its definition to its last use, and not to the entire toplevel loop, - // we call such variables "optimizable" - StringIntMap varUses; - int level = 1; - std::unordered_map levelDominations; // level => set of dominated variables XXX vector? - StringIntMap varLevels; - StringSet possibles; - StringSet unoptimizables; - auto purgeLevel = [&]() { - // Invalidate all dominating on this level, further users make it unoptimizable - for (auto name : levelDominations[level]) { - varLevels[name] = 0; - } - levelDominations[level].clear(); - level--; - }; - std::function possibilifier = [&](Ref node) { - Ref type = node[0]; - if (type == NAME) { - IString name = node[1]->getIString(); - if (asmData.isLocal(name)) { - varUses[name]++; - if (possibles.has(name) && !varLevels[name]) unoptimizables.insert(name); // used outside of simple domination - } - } else if (type == ASSIGN && node[1]->isBool(true)) { - if (!!node[2] && node[2][0] == NAME) { - IString name = node[2][1]->getIString(); - // if local and not yet used, this might be optimizable if we dominate - // all other uses - if (asmData.isLocal(name) && !varUses[name] && !varLevels[name]) { - possibles.insert(name); - varLevels[name] = level; - levelDominations[level].insert(name); - } - } - } else if (CONTROL_FLOW.has(type)) { - // recurse children, in the context of a loop - if (type == WHILE || type == DO) { - traversePrePostConditional(node[1], possibilifier, [](Ref node){}); - level++; - traversePrePostConditional(node[2], possibilifier, [](Ref node){}); - purgeLevel(); - } else if (type == FOR) { - traversePrePostConditional(node[1], possibilifier, [](Ref node){}); - for (int i = 2; i <= 4; i++) { - level++; - traversePrePostConditional(node[i], possibilifier, [](Ref node){}); - purgeLevel(); - } - } else if (type == IF) { - traversePrePostConditional(node[1], possibilifier, [](Ref node){}); - level++; - traversePrePostConditional(node[2], possibilifier, [](Ref node){}); - purgeLevel(); - if (node->size() > 3 && !!node[3]) { - level++; - traversePrePostConditional(node[3], possibilifier, [](Ref node){}); - purgeLevel(); - } - } else if (type == SWITCH) { - traversePrePostConditional(node[1], possibilifier, [](Ref node){}); - Ref cases = node[2]; - for (size_t i = 0; i < cases->size(); i++) { - level++; - traversePrePostConditional(cases[i][1], possibilifier, [](Ref node){}); - purgeLevel(); - } - } else assert(0);; - return false; // prevent recursion into children, which we already did - } - return true; - }; - traversePrePostConditional(fun, possibilifier, [](Ref node){}); - - StringSet optimizables; - for (auto possible : possibles) { - if (!unoptimizables.has(possible)) optimizables.insert(possible); - } - - // Go through the function's code, assigning 'registers'. - // The only tricky bit is to keep variables locked on a register through loops, - // since they can potentially be returned to. Optimizable variables lock onto - // loops that they enter, unoptimizable variables lock in a conservative way - // into the topmost loop. - // Note that we cannot lock onto a variable in a loop if it was used and free'd - // before! (then they could overwrite us in the early part of the loop). For now - // we just use a fresh register to make sure we avoid this, but it could be - // optimized to check for safe registers (free, and not used in this loop level). - StringStringMap varRegs; // maps variables to the register they will use all their life - std::vector freeRegsClasses; - freeRegsClasses.resize(ASM_NONE); - int nextReg = 1; - StringVec fullNames; - fullNames.push_back(EMPTY); // names start at 1 - std::vector loopRegs; // for each loop nesting level, the list of bound variables - int loops = 0; // 0 is toplevel, 1 is first loop, etc - StringSet activeOptimizables; - StringIntMap optimizableLoops; - StringSet paramRegs; // true if the register is used by a parameter (and so needs no def at start of function; also cannot - // be shared with another param, each needs its own) - auto decUse = [&](IString name) { - if (!varUses[name]) return false; // no uses left, or not a relevant variable - if (optimizables.has(name)) activeOptimizables.insert(name); - IString reg = varRegs[name]; - assert(asmData.isLocal(name)); - StringVec& freeRegs = freeRegsClasses[asmData.getType(name)]; - if (!reg) { - // acquire register - if (optimizables.has(name) && freeRegs.size() > 0 && - !(asmData.isParam(name) && paramRegs.has(freeRegs.back()))) { // do not share registers between parameters - reg = freeRegs.back(); - freeRegs.pop_back(); - } else { - assert(fullNames.size() == nextReg); - reg = getNewRegName(nextReg++, name); - fullNames.push_back(reg); - if (asmData.isParam(name)) paramRegs.insert(reg); - } - varRegs[name] = reg; - } - varUses[name]--; - assert(varUses[name] >= 0); - if (varUses[name] == 0) { - if (optimizables.has(name)) activeOptimizables.erase(name); - // If we are not in a loop, or we are optimizable and not bound to a loop - // (we might have been in one but left it), we can free the register now. - if (loops == 0 || (optimizables.has(name) && !optimizableLoops.has(name))) { - // free register - freeRegs.push_back(reg); - } else { - // when the relevant loop is exited, we will free the register - int relevantLoop = optimizables.has(name) ? (optimizableLoops[name] ? optimizableLoops[name] : 1) : 1; - if ((int)loopRegs.size() <= relevantLoop+1) loopRegs.resize(relevantLoop+1); - loopRegs[relevantLoop].push_back(reg); - } - } - return true; - }; - traversePrePost(fun, [&](Ref node) { // XXX we rely on traversal order being the same as execution order here - Ref type = node[0]; - if (type == NAME) { - IString name = node[1]->getIString(); - if (decUse(name)) { - node[1]->setString(varRegs[name]); - } - } else if (LOOP.has(type)) { - loops++; - // Active optimizables lock onto this loop, if not locked onto one that encloses this one - for (auto name : activeOptimizables) { - if (!optimizableLoops[name]) { - optimizableLoops[name] = loops; - } - } - } - }, [&](Ref node) { - Ref type = node[0]; - if (LOOP.has(type)) { - // Free registers that were locked to this loop - if ((int)loopRegs.size() > loops && loopRegs[loops].size() > 0) { - for (auto loopReg : loopRegs[loops]) { - freeRegsClasses[regTypes[loopReg]].push_back(loopReg); - } - loopRegs[loops].clear(); - } - loops--; - } - }); - if (!!fun[2] && fun[2]->size()) { - fun[2]->setSize(0); // clear params, we will fill with registers - fun[3]->splice(0, 1); // remove fake initial var - } - - asmData.locals.clear(); - asmData.params.clear(); - asmData.vars.clear(); - for (int i = 1; i < nextReg; i++) { - IString reg = fullNames[i]; - AsmType type = regTypes[reg]; - if (!paramRegs.has(reg)) { - asmData.addVar(reg, type); - } else { - asmData.addParam(reg, type); - fun[2]->push_back(makeString(reg)); - } - } - asmData.denormalize(); - }); -} - -// Assign variables to 'registers', coalescing them onto a smaller number of shared -// variables. -// -// This does the same job as 'registerize' above, but burns a lot more cycles trying -// to reduce the total number of register variables. Key points about the operation: -// -// * we decompose the AST into a flow graph and perform a full liveness -// analysis, to determine which variables are live at each point. -// -// * variables that are live concurrently are assigned to different registers. -// -// * variables that are linked via 'x=y' style statements are assigned the same -// register if possible, so that the redundant assignment can be removed. -// (e.g. assignments used to pass state around through loops). -// -// * any code that cannot be reached through the flow-graph is removed. -// (e.g. redundant break statements like 'break L123; break;'). -// -// * any assignments that we can prove are not subsequently used are removed. -// (e.g. unnecessary assignments to the 'label' variable). -// -void registerizeHarder(Ref ast) { -#ifdef PROFILING - clock_t tasmdata = 0; - clock_t tflowgraph = 0; - clock_t tlabelfix = 0; - clock_t tbackflow = 0; - clock_t tjuncvaruniqassign = 0; - clock_t tjuncvarsort = 0; - clock_t tregassign = 0; - clock_t tblockproc = 0; - clock_t treconstruct = 0; -#endif - - traverseFunctions(ast, [&](Ref fun) { - -#ifdef PROFILING - clock_t start = clock(); -#endif - - // Do not try to process non-validating methods, like the heap replacer - bool abort = false; - traversePre(fun, [&abort](Ref node) { - if (node[0] == NEW) abort = true; - }); - if (abort) return; - - AsmData asmData(fun); - -#ifdef PROFILING - tasmdata += clock() - start; - start = clock(); -#endif - - // Utilities for allocating register variables. - // We need distinct register pools for each type of variable. - - typedef std::map IntStringMap; - std::vector allRegsByType; - allRegsByType.resize(ASM_NONE+1); - int nextReg = 1; - - auto createReg = [&](IString forName) { - // Create a new register of type suitable for the given variable name. - AsmType type = asmData.getType(forName); - IntStringMap& allRegs = allRegsByType[type]; - int reg = nextReg++; - allRegs[reg] = getRegName(type, reg); - return reg; - }; - - // Traverse the tree in execution order and synthesize a basic flow-graph. - // It's convenient to build a kind of "dual" graph where the nodes identify - // the junctions between blocks at which control-flow may branch, and each - // basic block is an edge connecting two such junctions. - // For each junction we store: - // * set of blocks that originate at the junction - // * set of blocks that terminate at the junction - // For each block we store: - // * a single entry junction - // * a single exit junction - // * a 'use' and 'kill' set of names for the block - // * full sequence of NAME and ASSIGN nodes in the block - // * whether each such node appears as part of a larger expression - // (and therefore cannot be safely eliminated) - // * set of labels that can be used to jump to this block - - struct Junction { - int id; - std::set inblocks, outblocks; - IOrderedStringSet live; - Junction(int id_) : id(id_) {} - }; - struct Node { - }; - struct Block { - int id, entry, exit; - std::set labels; - std::vector nodes; - std::vector isexpr; - StringIntMap use; - StringSet kill; - StringStringMap link; - StringIntMap lastUseLoc; - StringIntMap firstDeadLoc; - StringIntMap firstKillLoc; - StringIntMap lastKillLoc; - - Block() : id(-1), entry(-1), exit(-1) {} - }; - struct ContinueBreak { - int co, br; - ContinueBreak() : co(-1), br(-1) {} - ContinueBreak(int co_, int br_) : co(co_), br(br_) {} - }; - typedef std::unordered_map LabelState; - - std::vector junctions; - std::vector blocks; - int currEntryJunction = -1; - Block* nextBasicBlock = nullptr; - int isInExpr = 0; - std::vector activeLabels; - activeLabels.resize(1); - IString nextLoopLabel; - - const int ENTRY_JUNCTION = 0; - const int EXIT_JUNCTION = 1; - const int ENTRY_BLOCK = 0; - - auto addJunction = [&]() { - // Create a new junction, without inserting it into the graph. - // This is useful for e.g. pre-allocating an exit node. - int id = junctions.size(); - junctions.push_back(Junction(id)); - return id; - }; - - std::function joinJunction; - - auto markJunction = [&](int id) { - // Mark current traversal location as a junction. - // This makes a new basic block exiting at this position. - if (id < 0) { - id = addJunction(); - } - joinJunction(id, true); - return id; - }; - - auto setJunction = [&](int id, bool force) { - // Set the next entry junction to the given id. - // This can be used to enter at a previously-declared point. - // You can't return to a junction with no incoming blocks - // unless the 'force' parameter is specified. - assert(nextBasicBlock->nodes.size() == 0); // refusing to abandon an in-progress basic block - if (force || junctions[id].inblocks.size() > 0) { - currEntryJunction = id; - } else { - currEntryJunction = -1; - } - }; - - joinJunction = [&](int id, bool force) { - // Complete the pending basic block by exiting at this position. - // This can be used to exit at a previously-declared point. - if (currEntryJunction >= 0) { - assert(nextBasicBlock); - nextBasicBlock->id = blocks.size(); - nextBasicBlock->entry = currEntryJunction; - nextBasicBlock->exit = id; - junctions[currEntryJunction].outblocks.insert(nextBasicBlock->id); - junctions[id].inblocks.insert(nextBasicBlock->id); - blocks.push_back(nextBasicBlock); - } - nextBasicBlock = new Block(); - setJunction(id, force); - return id; - }; - - auto pushActiveLabels = [&](int onContinue, int onBreak) { - // Push the target junctions for continuing/breaking a loop. - // This should be called before traversing into a loop. - assert(activeLabels.size() > 0); - LabelState& prevLabels = activeLabels.back(); - LabelState newLabels = prevLabels; - newLabels[EMPTY] = ContinueBreak(onContinue, onBreak); - if (!!nextLoopLabel) { - newLabels[nextLoopLabel] = ContinueBreak(onContinue, onBreak); - nextLoopLabel = EMPTY; - } - // An unlabelled CONTINUE should jump to innermost loop, - // ignoring any nested SWITCH statements. - if (onContinue < 0 && prevLabels.count(EMPTY) > 0) { - newLabels[EMPTY].co = prevLabels[EMPTY].co; - } - activeLabels.push_back(newLabels); - }; - - auto popActiveLabels = [&]() { - // Pop the target junctions for continuing/breaking a loop. - // This should be called after traversing into a loop. - activeLabels.pop_back(); - }; - - auto markNonLocalJump = [&](IString type, IString label) { - // Complete a block via RETURN, BREAK or CONTINUE. - // This joins the targetted junction and then sets the current junction to null. - // Any code traversed before we get back to an existing junction is dead code. - if (type == RETURN) { - joinJunction(EXIT_JUNCTION, false); - } else { - assert(activeLabels.size() > 0); - assert(activeLabels.back().count(label) > 0); // 'jump to unknown label'); - auto targets = activeLabels.back()[label]; - if (type == CONTINUE) { - joinJunction(targets.co, false); - } else if (type == BREAK) { - joinJunction(targets.br, false); - } else { - assert(0); // 'unknown jump node type'); - } - } - currEntryJunction = -1; - }; - - auto addUseNode = [&](Ref node) { - // Mark a use of the given name node in the current basic block. - assert(node[0] == NAME); // 'not a use node'); - IString name = node[1]->getIString(); - if (asmData.isLocal(name)) { - nextBasicBlock->nodes.push_back(node); - nextBasicBlock->isexpr.push_back(isInExpr != 0); - if (nextBasicBlock->kill.count(name) == 0) { - nextBasicBlock->use[name] = 1; - } - } - }; - - auto addKillNode = [&](Ref node) { - // Mark an assignment to the given name node in the current basic block. - assert(node[0] == ASSIGN); //, 'not a kill node'); - assert(node[1]->isBool(true)); // 'not a kill node'); - assert(node[2][0] == NAME); //, 'not a kill node'); - IString name = node[2][1]->getIString(); - if (asmData.isLocal(name)) { - nextBasicBlock->nodes.push_back(node); - nextBasicBlock->isexpr.push_back(isInExpr != 0); - nextBasicBlock->kill.insert(name); - } - }; - - std::function lookThroughCasts = [&](Ref node) { - // Look through value-preserving casts, like "x | 0" => "x" - if (node[0] == BINARY && node[1] == OR) { - if (node[3][0] == NUM && node[3][1]->getNumber() == 0) { - return lookThroughCasts(node[2]); - } - } - return node; - }; - - auto addBlockLabel = [&](Ref node) { - assert(nextBasicBlock->nodes.size() == 0); // 'cant add label to an in-progress basic block') - if (node[0] == NUM) { - nextBasicBlock->labels.insert(node[1]->getInteger()); - } - }; - - auto isTrueNode = [&](Ref node) { - // Check if the given node is statically truthy. - return (node[0] == NUM && node[1]->getNumber() != 0); - }; - - auto isFalseNode = [&](Ref node) { - // Check if the given node is statically falsy. - return (node[0] == NUM && node[1]->getNumber() == 0); - }; - - std::function buildFlowGraph = [&](Ref node) { - // Recursive function to build up the flow-graph. - // It walks the tree in execution order, calling the above state-management - // functions at appropriate points in the traversal. - Ref type = node[0]; - - // Any code traversed without an active entry junction must be dead, - // as the resulting block could never be entered. Let's remove it. - if (currEntryJunction < 0 && junctions.size() > 0) { - safeCopy(node, makeEmpty()); - return; - } - - // Traverse each node type according to its particular control-flow semantics. - // TODO: switchify this - if (type == DEFUN) { - int jEntry = markJunction(-1); - assert(jEntry == ENTRY_JUNCTION); - int jExit = addJunction(); - assert(jExit == EXIT_JUNCTION); - for (size_t i = 0; i < node[3]->size(); i++) { - buildFlowGraph(node[3][i]); - } - joinJunction(jExit, false); - } else if (type == IF) { - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - int jEnter = markJunction(-1); - int jExit = addJunction(); - if (!!node[2]) { - // Detect and mark "if (label == N) { }". - if (node[1][0] == BINARY && node[1][1] == EQ) { - Ref lhs = lookThroughCasts(node[1][2]); - if (lhs[0] == NAME && lhs[1] == LABEL) { - addBlockLabel(lookThroughCasts(node[1][3])); - } - } - buildFlowGraph(node[2]); - } - joinJunction(jExit, false); - setJunction(jEnter, false); - if (node->size() > 3 && !!node[3]) { - buildFlowGraph(node[3]); - } - joinJunction(jExit, false); - } else if (type == CONDITIONAL) { - isInExpr++; - // If the conditional has no side-effects, we can treat it as a single - // block, which might open up opportunities to remove it entirely. - if (!hasSideEffects(node)) { - buildFlowGraph(node[1]); - if (!!node[2]) { - buildFlowGraph(node[2]); - } - if (!!node[3]) { - buildFlowGraph(node[3]); - } - } else { - buildFlowGraph(node[1]); - int jEnter = markJunction(-1); - int jExit = addJunction(); - if (!!node[2]) { - buildFlowGraph(node[2]); - } - joinJunction(jExit, false); - setJunction(jEnter, false); - if (!!node[3]) { - buildFlowGraph(node[3]); - } - joinJunction(jExit, false); - } - isInExpr--; - } else if (type == WHILE) { - // Special-case "while (1) {}" to use fewer junctions, - // since emscripten generates a lot of these. - if (isTrueNode(node[1])) { - int jLoop = markJunction(-1); - int jExit = addJunction(); - pushActiveLabels(jLoop, jExit); - buildFlowGraph(node[2]); - popActiveLabels(); - joinJunction(jLoop, false); - setJunction(jExit, false); - } else { - int jCond = markJunction(-1); - int jLoop = addJunction(); - int jExit = addJunction(); - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - joinJunction(jLoop, false); - pushActiveLabels(jCond, jExit); - buildFlowGraph(node[2]); - popActiveLabels(); - joinJunction(jCond, false); - // An empty basic-block linking condition exit to loop exit. - setJunction(jLoop, false); - joinJunction(jExit, false); - } - } else if (type == DO) { - // Special-case "do {} while (1)" and "do {} while (0)" to use - // fewer junctions, since emscripten generates a lot of these. - if (isFalseNode(node[1])) { - int jExit = addJunction(); - pushActiveLabels(jExit, jExit); - buildFlowGraph(node[2]); - popActiveLabels(); - joinJunction(jExit, false); - } else if (isTrueNode(node[1])) { - int jLoop = markJunction(-1); - int jExit = addJunction(); - pushActiveLabels(jLoop, jExit); - buildFlowGraph(node[2]); - popActiveLabels(); - joinJunction(jLoop, false); - setJunction(jExit, false); - } else { - int jLoop = markJunction(-1); - int jCond = addJunction(); - int jCondExit = addJunction(); - int jExit = addJunction(); - pushActiveLabels(jCond, jExit); - buildFlowGraph(node[2]); - popActiveLabels(); - joinJunction(jCond, false); - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - joinJunction(jCondExit, false); - joinJunction(jLoop, false); - setJunction(jCondExit, false); - joinJunction(jExit, false); - } - } else if (type == FOR) { - int jTest = addJunction(); - int jBody = addJunction(); - int jStep = addJunction(); - int jExit = addJunction(); - buildFlowGraph(node[1]); - joinJunction(jTest, false); - isInExpr++; - buildFlowGraph(node[2]); - isInExpr--; - joinJunction(jBody, false); - pushActiveLabels(jStep, jExit); - buildFlowGraph(node[4]); - popActiveLabels(); - joinJunction(jStep, false); - buildFlowGraph(node[3]); - joinJunction(jTest, false); - setJunction(jBody, false); - joinJunction(jExit, false); - } else if (type == LABEL) { - assert(BREAK_CAPTURERS.has(node[2][0])); // 'label on non-loop, non-switch statement') - nextLoopLabel = node[1]->getIString(); - buildFlowGraph(node[2]); - } else if (type == SWITCH) { - // Emscripten generates switch statements of a very limited - // form: all case clauses are numeric literals, and all - // case bodies end with a (maybe implicit) break. So it's - // basically equivalent to a multi-way IF statement. - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - Ref condition = lookThroughCasts(node[1]); - int jCheckExit = markJunction(-1); - int jExit = addJunction(); - pushActiveLabels(-1, jExit); - bool hasDefault = false; - for (size_t i = 0; i < node[2]->size(); i++) { - setJunction(jCheckExit, false); - // All case clauses are either 'default' or a numeric literal. - if (!node[2][i][0]) { - hasDefault = true; - } else { - // Detect switches dispatching to labelled blocks. - if (condition[0] == NAME && condition[1] == LABEL) { - addBlockLabel(lookThroughCasts(node[2][i][0])); - } - } - for (size_t j = 0; j < node[2][i][1]->size(); j++) { - buildFlowGraph(node[2][i][1][j]); - } - // Control flow will never actually reach the end of the case body. - // If there's live code here, assume it jumps to case exit. - if (currEntryJunction >= 0 && nextBasicBlock->nodes.size() > 0) { - if (!!node[2][i][0]) { - markNonLocalJump(RETURN, EMPTY); - } else { - joinJunction(jExit, false); - } - } - } - // If there was no default case, we also need an empty block - // linking straight from the test evaluation to the exit. - if (!hasDefault) { - setJunction(jCheckExit, false); - } - joinJunction(jExit, false); - popActiveLabels(); - } else if (type == RETURN) { - if (!!node[1]) { - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - } - markNonLocalJump(type->getIString(), EMPTY); - } else if (type == BREAK || type == CONTINUE) { - markNonLocalJump(type->getIString(), !!node[1] ? node[1]->getIString() : EMPTY); - } else if (type == ASSIGN) { - isInExpr++; - buildFlowGraph(node[3]); - isInExpr--; - if (node[1]->isBool(true) && node[2][0] == NAME) { - addKillNode(node); - } else { - buildFlowGraph(node[2]); - } - } else if (type == NAME) { - addUseNode(node); - } else if (type == BLOCK || type == TOPLEVEL) { - if (!!node[1]) { - for (size_t i = 0; i < node[1]->size(); i++) { - buildFlowGraph(node[1][i]); - } - } - } else if (type == STAT) { - buildFlowGraph(node[1]); - } else if (type == UNARY_PREFIX || type == UNARY_POSTFIX) { - isInExpr++; - buildFlowGraph(node[2]); - isInExpr--; - } else if (type == BINARY) { - isInExpr++; - buildFlowGraph(node[2]); - buildFlowGraph(node[3]); - isInExpr--; - } else if (type == CALL) { - isInExpr++; - buildFlowGraph(node[1]); - if (!!node[2]) { - for (size_t i = 0; i < node[2]->size(); i++) { - buildFlowGraph(node[2][i]); - } - } - isInExpr--; - // If the call is statically known to throw, - // treat it as a jump to function exit. - if (!isInExpr && node[1][0] == NAME) { - if (FUNCTIONS_THAT_ALWAYS_THROW.has(node[1][1])) { - markNonLocalJump(RETURN, EMPTY); - } - } - } else if (type == SEQ || type == SUB) { - isInExpr++; - buildFlowGraph(node[1]); - buildFlowGraph(node[2]); - isInExpr--; - } else if (type == DOT || type == THROW) { - isInExpr++; - buildFlowGraph(node[1]); - isInExpr--; - } else if (type == NUM || type == STRING || type == VAR) { - // nada - } else { - assert(0); // 'unsupported node type: ' + type); - } - }; - - buildFlowGraph(fun); - -#ifdef PROFILING - tflowgraph += clock() - start; - start = clock(); -#endif - - assert(junctions[ENTRY_JUNCTION].inblocks.size() == 0); // 'function entry must have no incoming blocks'); - assert(junctions[EXIT_JUNCTION].outblocks.size() == 0); // 'function exit must have no outgoing blocks'); - assert(blocks[ENTRY_BLOCK]->entry == ENTRY_JUNCTION); //, 'block zero must be the initial block'); - - // Fix up implicit jumps done by assigning to the LABEL variable. - // If a block ends with an assignment to LABEL and there's another block - // with that value of LABEL as precondition, we tweak the flow graph so - // that the former jumps straight to the later. - - std::map labelledBlocks; - typedef std::pair Jump; - std::vector labelledJumps; - - for (size_t i = 0; i < blocks.size(); i++) { - Block* block = blocks[i]; - // Does it have any labels as preconditions to its entry? - for (auto labelVal : block->labels) { - // If there are multiple blocks with the same label, all bets are off. - // This seems to happen sometimes for short blocks that end with a return. - // TODO: it should be safe to merge the duplicates if they're identical. - if (labelledBlocks.count(labelVal) > 0) { - labelledBlocks.clear(); - labelledJumps.clear(); - goto AFTER_FINDLABELLEDBLOCKS; - } - labelledBlocks[labelVal] = block; - } - // Does it assign a specific label value at exit? - if (block->kill.has(LABEL)) { - Ref finalNode = block->nodes.back(); - if (finalNode[0] == ASSIGN && finalNode[2][1] == LABEL) { - // If labels are computed dynamically then all bets are off. - // This can happen due to indirect branching in llvm output. - if (finalNode[3][0] != NUM) { - labelledBlocks.clear(); - labelledJumps.clear(); - goto AFTER_FINDLABELLEDBLOCKS; - } - labelledJumps.push_back(Jump(finalNode[3][1], block)); - } else { - // If label is assigned a non-zero value elsewhere in the block - // then all bets are off. This can happen e.g. due to outlining - // saving/restoring label to the stack. - for (size_t j = 0; j < block->nodes.size() - 1; j++) { - if (block->nodes[j][0] == ASSIGN && block->nodes[j][2][1] == LABEL) { - if (block->nodes[j][3][0] != NUM || block->nodes[j][3][1]->getNumber() != 0) { - labelledBlocks.clear(); - labelledJumps.clear(); - goto AFTER_FINDLABELLEDBLOCKS; - } - } - } - } - } - } - - AFTER_FINDLABELLEDBLOCKS: - - for (auto labelVal : labelledBlocks) { - Block* block = labelVal.second; - // Disconnect it from the graph, and create a - // new junction for jumps targetting this label. - junctions[block->entry].outblocks.erase(block->id); - block->entry = addJunction(); - junctions[block->entry].outblocks.insert(block->id); - // Add a fake use of LABEL to keep it alive in predecessor. - block->use[LABEL] = 1; - block->nodes.insert(block->nodes.begin(), makeName(LABEL)); - block->isexpr.insert(block->isexpr.begin(), 1); - } - for (size_t i = 0; i < labelledJumps.size(); i++) { - auto labelVal = labelledJumps[i].first; - auto block = labelledJumps[i].second; - Block* targetBlock = labelledBlocks[labelVal->getInteger()]; - if (targetBlock) { - // Redirect its exit to entry of the target block. - junctions[block->exit].inblocks.erase(block->id); - block->exit = targetBlock->entry; - junctions[block->exit].inblocks.insert(block->id); - } - } - -#ifdef PROFILING - tlabelfix += clock() - start; - start = clock(); -#endif - - // Do a backwards data-flow analysis to determine the set of live - // variables at each junction, and to use this information to eliminate - // any unused assignments. - // We run two nested phases. The inner phase builds the live set for each - // junction. The outer phase uses this to try to eliminate redundant - // stores in each basic block, which might in turn affect liveness info. - - auto analyzeJunction = [&](Junction& junc) { - // Update the live set for this junction. - IOrderedStringSet live; - for (auto b : junc.outblocks) { - Block* block = blocks[b]; - IOrderedStringSet& liveSucc = junctions[block->exit].live; - for (auto name : liveSucc) { - if (!block->kill.has(name)) { - live.insert(name); - } - } - for (auto name : block->use) { - live.insert(name.first); - } - } - junc.live = live; - }; - - auto analyzeBlock = [&](Block* block) { - // Update information about the behaviour of the block. - // This includes the standard 'use' and 'kill' information, - // plus a 'link' set naming values that flow through from entry - // to exit, possibly changing names via simple 'x=y' assignments. - // As we go, we eliminate assignments if the variable is not - // subsequently used. - auto live = junctions[block->exit].live; - StringIntMap use; - StringSet kill; - StringStringMap link; - StringIntMap lastUseLoc; - StringIntMap firstDeadLoc; - StringIntMap firstKillLoc; - StringIntMap lastKillLoc; - for (auto name : live) { - link[name] = name; - lastUseLoc[name] = block->nodes.size(); - firstDeadLoc[name] = block->nodes.size(); - } - for (int j = block->nodes.size() - 1; j >= 0 ; j--) { - Ref node = block->nodes[j]; - if (node[0] == NAME) { - IString name = node[1]->getIString(); - live.insert(name); - use[name] = j; - if (lastUseLoc.count(name) == 0) { - lastUseLoc[name] = j; - firstDeadLoc[name] = j; - } - } else { - IString name = node[2][1]->getIString(); - // We only keep assignments if they will be subsequently used. - if (live.has(name)) { - kill.insert(name); - use.erase(name); - live.erase(name); - firstDeadLoc[name] = j; - firstKillLoc[name] = j; - if (lastUseLoc.count(name) == 0) { - lastUseLoc[name] = j; - } - if (lastKillLoc.count(name) == 0) { - lastKillLoc[name] = j; - } - // If it's an "x=y" and "y" is not live, then we can create a - // flow-through link from "y" to "x". If not then there's no - // flow-through link for "x". - if (link.has(name)) { - IString oldLink = link[name]; - link.erase(name); - if (node[3][0] == NAME) { - if (asmData.isLocal(node[3][1]->getIString())) { - link[node[3][1]->getIString()] = oldLink; - } - } - } - } else { - // The result of this assignment is never used, so delete it. - // We may need to keep the RHS for its value or its side-effects. - auto removeUnusedNodes = [&](int j, int n) { - for (auto pair : lastUseLoc) { - pair.second -= n; - } - for (auto pair : firstKillLoc) { - pair.second -= n; - } - for (auto pair : lastKillLoc) { - pair.second -= n; - } - for (auto pair : firstDeadLoc) { - pair.second -= n; - } - block->nodes.erase(block->nodes.begin() + j, block->nodes.begin() + j + n); - block->isexpr.erase(block->isexpr.begin() + j, block->isexpr.begin() + j + n); - }; - if (block->isexpr[j] || hasSideEffects(node[3])) { - safeCopy(node, node[3]); - removeUnusedNodes(j, 1); - } else { - int numUsesInExpr = 0; - traversePre(node[3], [&](Ref node) { - if (node[0] == NAME && asmData.isLocal(node[1]->getIString())) { - numUsesInExpr++; - } - }); - safeCopy(node, makeEmpty()); - j = j - numUsesInExpr; - removeUnusedNodes(j, 1 + numUsesInExpr); - } - } - } - } - // XXX efficiency - block->use = use; - block->kill = kill; - block->link = link; - block->lastUseLoc = lastUseLoc; - block->firstDeadLoc = firstDeadLoc; - block->firstKillLoc = firstKillLoc; - block->lastKillLoc = lastKillLoc; - }; - - // Ordered map to work in approximate reverse order of junction appearance - std::set jWorkSet; - std::set bWorkSet; - - // Be sure to visit every junction at least once. - // This avoids missing some vars because we disconnected them - // when processing the labelled jumps. - for (size_t i = EXIT_JUNCTION; i < junctions.size(); i++) { - jWorkSet.insert(i); - for (auto b : junctions[i].inblocks) { - bWorkSet.insert(b); - } - } - // Exit junction never has any live variable changes to propagate - jWorkSet.erase(EXIT_JUNCTION); - - do { - // Iterate on just the junctions until we get stable live sets. - // The first run of this loop will grow the live sets to their maximal size. - // Subsequent runs will shrink them based on eliminated in-block uses. - while (jWorkSet.size() > 0) { - auto last = jWorkSet.end(); - --last; - Junction& junc = junctions[*last]; - jWorkSet.erase(last); - IOrderedStringSet oldLive = junc.live; // copy it here, to check for changes later - analyzeJunction(junc); - if (oldLive != junc.live) { - // Live set changed, updated predecessor blocks and junctions. - for (auto b : junc.inblocks) { - bWorkSet.insert(b); - jWorkSet.insert(blocks[b]->entry); - } - } - } - // Now update the blocks based on the calculated live sets. - while (bWorkSet.size() > 0) { - auto last = bWorkSet.end(); - --last; - Block* block = blocks[*last]; - bWorkSet.erase(last); - auto oldUse = block->use; - analyzeBlock(block); - if (oldUse != block->use) { - // The use set changed, re-process the entry junction. - jWorkSet.insert(block->entry); - } - } - } while (jWorkSet.size() > 0); - -#ifdef PROFILING - tbackflow += clock() - start; - start = clock(); -#endif - - // Insist that all function parameters are alive at function entry. - // This ensures they will be assigned independent registers, even - // if they happen to be unused. - - for (auto name : asmData.params) { - junctions[ENTRY_JUNCTION].live.insert(name); - } - - // For variables that are live at one or more junctions, we assign them - // a consistent register for the entire scope of the function. Find pairs - // of variable that cannot use the same register (the "conflicts") as well - // as pairs of variables that we'd like to have share the same register - // (the "links"). - - struct JuncVar { - std::vector conf; - IOrderedStringSet link; - std::unordered_set excl; - int reg; - bool used; - JuncVar() : reg(-1), used(false) {} - }; - size_t numLocals = asmData.locals.size(); - std::unordered_map nameToNum; - std::vector numToName; - nameToNum.reserve(numLocals); - numToName.reserve(numLocals); - for (auto kv : asmData.locals) { - nameToNum[kv.first] = numToName.size(); - numToName.push_back(kv.first); - } - - std::vector juncVars(numLocals); - for (Junction& junc : junctions) { - for (IString name : junc.live) { - JuncVar& jVar = juncVars[nameToNum[name]]; - jVar.used = true; - jVar.conf.assign(numLocals, false); - } - } - std::map> possibleBlockConflictsMap; - std::vector>> possibleBlockConflicts; - std::unordered_map> possibleBlockLinks; - possibleBlockConflicts.reserve(numLocals); - possibleBlockLinks.reserve(numLocals); - - for (Junction& junc : junctions) { - // Pre-compute the possible conflicts and links for each block rather - // than checking potentially impossible options for each var - possibleBlockConflictsMap.clear(); - possibleBlockConflicts.clear(); - possibleBlockLinks.clear(); - for (auto b : junc.outblocks) { - Block* block = blocks[b]; - Junction& jSucc = junctions[block->exit]; - for (auto name : jSucc.live) { - possibleBlockConflictsMap[name].push_back(block); - } - for (auto name_linkname : block->link) { - if (name_linkname.first != name_linkname.second) { - possibleBlockLinks[name_linkname.first].push_back(block); - } - } - } - // Find the live variables in this block, mark them as unnecessary to - // check for conflicts (we mark all live vars as conflicting later) - std::vector liveJVarNums; - liveJVarNums.reserve(junc.live.size()); - for (auto name : junc.live) { - size_t jVarNum = nameToNum[name]; - liveJVarNums.push_back(jVarNum); - possibleBlockConflictsMap.erase(name); - } - // Extract just the variables we might want to check for conflicts - for (auto kv : possibleBlockConflictsMap) { - possibleBlockConflicts.push_back(std::make_pair(nameToNum[kv.first], kv.second)); - } - - for (size_t jVarNum : liveJVarNums) { - JuncVar& jvar = juncVars[jVarNum]; - IString name = numToName[jVarNum]; - // It conflicts with all other names live at this junction. - for (size_t liveJVarNum : liveJVarNums) { - jvar.conf[liveJVarNum] = true; - } - jvar.conf[jVarNum] = false; // except for itself, of course - - // It conflicts with any output vars of successor blocks, - // if they're assigned before it goes dead in that block. - for (auto jvarnum_blocks : possibleBlockConflicts) { - size_t otherJVarNum = jvarnum_blocks.first; - IString otherName = numToName[otherJVarNum]; - for (auto block : jvarnum_blocks.second) { - if (block->lastKillLoc[otherName] < block->firstDeadLoc[name]) { - jvar.conf[otherJVarNum] = true; - juncVars[otherJVarNum].conf[jVarNum] = true; - break; - } - } - } - - // It links with any linkages in the outgoing blocks. - for (auto block: possibleBlockLinks[name]) { - IString linkName = block->link[name]; - jvar.link.insert(linkName); - juncVars[nameToNum[linkName]].link.insert(name); - } - } - } - -#ifdef PROFILING - tjuncvaruniqassign += clock() - start; - start = clock(); -#endif - - // Attempt to sort the junction variables to heuristically reduce conflicts. - // Simple starting point: handle the most-conflicted variables first. - // This seems to work pretty well. - - std::vector sortedJVarNums; - sortedJVarNums.reserve(juncVars.size()); - std::vector jVarConfCounts(numLocals); - for (size_t jVarNum = 0; jVarNum < juncVars.size(); jVarNum++) { - JuncVar& jVar = juncVars[jVarNum]; - if (!jVar.used) continue; - jVarConfCounts[jVarNum] = std::count(jVar.conf.begin(), jVar.conf.end(), true); - sortedJVarNums.push_back(jVarNum); - } - std::sort(sortedJVarNums.begin(), sortedJVarNums.end(), [&](const size_t vi1, const size_t vi2) { - // sort by # of conflicts - if (jVarConfCounts[vi1] < jVarConfCounts[vi2]) return true; - if (jVarConfCounts[vi1] == jVarConfCounts[vi2]) return numToName[vi1] < numToName[vi2]; - return false; - }); - -#ifdef PROFILING - tjuncvarsort += clock() - start; - start = clock(); -#endif - - // We can now assign a register to each junction variable. - // Process them in order, trying available registers until we find - // one that works, and propagating the choice to linked/conflicted - // variables as we go. - - std::function tryAssignRegister = [&](IString name, int reg) { - // Try to assign the given register to the given variable, - // and propagate that choice throughout the graph. - // Returns true if successful, false if there was a conflict. - JuncVar& jv = juncVars[nameToNum[name]]; - if (jv.reg > 0) { - return jv.reg == reg; - } - if (jv.excl.count(reg) > 0) { - return false; - } - jv.reg = reg; - // Exclude use of this register at all conflicting variables. - for (size_t confNameNum = 0; confNameNum < jv.conf.size(); confNameNum++) { - if (jv.conf[confNameNum]) { - juncVars[confNameNum].excl.insert(reg); - } - } - // Try to propagate it into linked variables. - // It's not an error if we can't. - for (auto linkName : jv.link) { - tryAssignRegister(linkName, reg); - } - return true; - }; - for (size_t jVarNum : sortedJVarNums) { - // It may already be assigned due to linked-variable propagation. - if (juncVars[jVarNum].reg > 0) { - continue; - } - IString name = numToName[jVarNum]; - // Try to use existing registers first. - auto& allRegs = allRegsByType[asmData.getType(name)]; - bool moar = false; - for (auto reg : allRegs) { - if (tryAssignRegister(name, reg.first)) { - moar = true; - break; - } - } - if (moar) continue; - // They're all taken, create a new one. - tryAssignRegister(name, createReg(name)); - } - -#ifdef PROFILING - tregassign += clock() - start; - start = clock(); -#endif - - // Each basic block can now be processed in turn. - // There may be internal-use-only variables that still need a register - // assigned, but they can be treated just for this block. We know - // that all inter-block variables are in a good state thanks to - // junction variable consistency. - - for (size_t i = 0; i < blocks.size(); i++) { - Block* block = blocks[i]; - if (block->nodes.size() == 0) continue; - Junction& jEnter = junctions[block->entry]; - Junction& jExit = junctions[block->exit]; - // Mark the point at which each input reg becomes dead. - // Variables alive before this point must not be assigned - // to that register. - StringSet inputVars; - std::unordered_map inputDeadLoc; - std::unordered_map inputVarsByReg; - for (auto name : jExit.live) { - if (!block->kill.has(name)) { - inputVars.insert(name); - int reg = juncVars[nameToNum[name]].reg; - assert(reg > 0); // 'input variable doesnt have a register'); - inputDeadLoc[reg] = block->firstDeadLoc[name]; - inputVarsByReg[reg] = name; - } - } - for (auto pair : block->use) { - IString name = pair.first; - if (!inputVars.has(name)) { - inputVars.insert(name); - int reg = juncVars[nameToNum[name]].reg; - assert(reg > 0); // 'input variable doesnt have a register'); - inputDeadLoc[reg] = block->firstDeadLoc[name]; - inputVarsByReg[reg] = name; - } - } - // TODO assert(setSize(setSub(inputVars, jEnter.live)) == 0); - // Scan through backwards, allocating registers on demand. - // Be careful to avoid conflicts with the input registers. - // We consume free registers in last-used order, which helps to - // eliminate "x=y" assignments that are the last use of "y". - StringIntMap assignedRegs; - auto freeRegsByTypePre = allRegsByType; // XXX copy - // Begin with all live vars assigned per the exit junction. - for (auto name : jExit.live) { - int reg = juncVars[nameToNum[name]].reg; - assert(reg > 0); // 'output variable doesnt have a register'); - assignedRegs[name] = reg; - freeRegsByTypePre[asmData.getType(name)].erase(reg); // XXX assert? - } - std::vector> freeRegsByType; - freeRegsByType.resize(freeRegsByTypePre.size()); - for (size_t j = 0; j < freeRegsByTypePre.size(); j++) { - for (auto pair : freeRegsByTypePre[j]) { - freeRegsByType[j].push_back(pair.first); - } - } - // Scan through the nodes in sequence, modifying each node in-place - // and grabbing/freeing registers as needed. - std::vector> maybeRemoveNodes; - for (int j = block->nodes.size() - 1; j >= 0; j--) { - Ref node = block->nodes[j]; - IString name = (node[0] == ASSIGN ? node[2][1] : node[1])->getIString(); - IntStringMap& allRegs = allRegsByType[asmData.getType(name)]; - std::vector& freeRegs = freeRegsByType[asmData.getType(name)]; - int reg = assignedRegs[name]; // XXX may insert a zero - if (node[0] == NAME) { - // A use. Grab a register if it doesn't have one. - if (reg <= 0) { - if (inputVars.has(name) && j <= block->firstDeadLoc[name]) { - // Assignment to an input variable, must use pre-assigned reg. - reg = juncVars[nameToNum[name]].reg; - assignedRegs[name] = reg; - for (int k = freeRegs.size() - 1; k >= 0; k--) { - if (freeRegs[k] == reg) { - freeRegs.erase(freeRegs.begin() + k); - break; - } - } - } else { - // Try to use one of the existing free registers. - // It must not conflict with an input register. - for (int k = freeRegs.size() - 1; k >= 0; k--) { - reg = freeRegs[k]; - // Check for conflict with input registers. - if (inputDeadLoc.count(reg) > 0) { - if (block->firstKillLoc[name] <= inputDeadLoc[reg]) { - if (name != inputVarsByReg[reg]) { - continue; - } - } - } - // Found one! - assignedRegs[name] = reg; - assert(reg > 0); - freeRegs.erase(freeRegs.begin() + k); - break; - } - // If we didn't find a suitable register, create a new one. - if (assignedRegs[name] <= 0) { - reg = createReg(name); - assignedRegs[name] = reg; - } - } - } - node[1]->setString(allRegs[reg]); - } else { - // A kill. This frees the assigned register. - assert(reg > 0); //, 'live variable doesnt have a reg?') - node[2][1]->setString(allRegs[reg]); - freeRegs.push_back(reg); - assignedRegs.erase(name); - if (node[3][0] == NAME && asmData.isLocal(node[3][1]->getIString())) { - maybeRemoveNodes.push_back(std::pair(j, node)); - } - } - } - // If we managed to create any "x=x" assignments, remove them. - for (size_t j = 0; j < maybeRemoveNodes.size(); j++) { - Ref node = maybeRemoveNodes[j].second; - if (node[2][1] == node[3][1]) { - if (block->isexpr[maybeRemoveNodes[j].first]) { - safeCopy(node, node[2]); - } else { - safeCopy(node, makeEmpty()); - } - } - } - } - -#ifdef PROFILING - tblockproc += clock() - start; - start = clock(); -#endif - - // Assign registers to function params based on entry junction - - StringSet paramRegs; - if (!!fun[2]) { - for (size_t i = 0; i < fun[2]->size(); i++) { - auto& allRegs = allRegsByType[asmData.getType(fun[2][i]->getIString())]; - fun[2][i]->setString(allRegs[juncVars[nameToNum[fun[2][i]->getIString()]].reg]); - paramRegs.insert(fun[2][i]->getIString()); - } - } - - // That's it! - // Re-construct the function with appropriate variable definitions. - - asmData.locals.clear(); - asmData.params.clear(); - asmData.vars.clear(); - for (int i = 1; i < nextReg; i++) { - for (size_t type = 0; type < allRegsByType.size(); type++) { - if (allRegsByType[type].count(i) > 0) { - IString reg = allRegsByType[type][i]; - if (!paramRegs.has(reg)) { - asmData.addVar(reg, intToAsmType(type)); - } else { - asmData.addParam(reg, intToAsmType(type)); - } - break; - } - } - } - asmData.denormalize(); - - removeAllUselessSubNodes(fun); // XXX vacuum? vacuum(fun); - -#ifdef PROFILING - treconstruct += clock() - start; - start = clock(); -#endif - - }); -#ifdef PROFILING - errv(" RH stages: a:%li fl:%li lf:%li bf:%li jvua:%li jvs:%li jra:%li bp:%li r:%li", - tasmdata, tflowgraph, tlabelfix, tbackflow, tjuncvaruniqassign, tjuncvarsort, tregassign, tblockproc, treconstruct); -#endif -} -// end registerizeHarder - -// minified names generation -StringSet RESERVED("do if in for new try var env let"); -const char *VALID_MIN_INITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; -const char *VALID_MIN_LATERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$0123456789"; - -StringVec minifiedNames; -std::vector minifiedState; - -void ensureMinifiedNames(int n) { // make sure the nth index in minifiedNames exists. done 100% deterministically - static int VALID_MIN_INITS_LEN = strlen(VALID_MIN_INITS); - static int VALID_MIN_LATERS_LEN = strlen(VALID_MIN_LATERS); - - while ((int)minifiedNames.size() < n+1) { - // generate the current name - std::string name; - name += VALID_MIN_INITS[minifiedState[0]]; - for (size_t i = 1; i < minifiedState.size(); i++) { - name += VALID_MIN_LATERS[minifiedState[i]]; - } - IString str(strdupe(name.c_str())); // leaked! - if (!RESERVED.has(str)) minifiedNames.push_back(str); - // increment the state - size_t i = 0; - while (1) { - minifiedState[i]++; - if (minifiedState[i] < (i == 0 ? VALID_MIN_INITS_LEN : VALID_MIN_LATERS_LEN)) break; - // overflow - minifiedState[i] = 0; - i++; - if (i == minifiedState.size()) minifiedState.push_back(-1); // will become 0 after increment in next loop head - } - } -} - -void minifyLocals(Ref ast) { - assert(!!extraInfo); - IString GLOBALS("globals"); - assert(extraInfo->has(GLOBALS)); - Ref globals = extraInfo[GLOBALS]; - - if (minifiedState.size() == 0) minifiedState.push_back(0); - - traverseFunctions(ast, [&globals](Ref fun) { - // Analyse the asmjs to figure out local variable names, - // but operate on the original source tree so that we don't - // miss any global names in e.g. variable initializers. - AsmData asmData(fun); - asmData.denormalize(); // TODO: we can avoid modifying at all here - we just need a list of local vars+params - - StringStringMap newNames; - StringSet usedNames; - - // Find all the globals that we need to minify using - // pre-assigned names. Don't actually minify them yet - // as that might interfere with local variable names. - traversePre(fun, [&](Ref node) { - if (node[0] == NAME) { - IString name = node[1]->getIString(); - if (!asmData.isLocal(name)) { - if (globals->has(name)) { - IString minified = globals[name]->getIString(); - assert(!!minified); - newNames[name] = minified; - usedNames.insert(minified); - } - } - } - }); - - // The first time we encounter a local name, we assign it a - // minified name that's not currently in use. Allocating on - // demand means they're processed in a predictable order, - // which is very handy for testing/debugging purposes. - int nextMinifiedName = 0; - auto getNextMinifiedName = [&]() { - IString minified; - while (1) { - ensureMinifiedNames(nextMinifiedName); - minified = minifiedNames[nextMinifiedName++]; - // TODO: we can probably remove !isLocalName here - if (!usedNames.has(minified) && !asmData.isLocal(minified)) { - return minified; - } - } - }; - - // We can also minify loop labels, using a separate namespace - // to the variable declarations. - StringStringMap newLabels; - int nextMinifiedLabel = 0; - auto getNextMinifiedLabel = [&]() { - ensureMinifiedNames(nextMinifiedLabel); - return minifiedNames[nextMinifiedLabel++]; - }; - - // Traverse and minify all names. - if (globals->has(fun[1]->getIString())) { - fun[1]->setString(globals[fun[1]->getIString()]->getIString()); - assert(!!fun[1]); - } - if (!!fun[2]) { - for (size_t i = 0; i < fun[2]->size(); i++) { - IString minified = getNextMinifiedName(); - newNames[fun[2][i]->getIString()] = minified; - fun[2][i]->setString(minified); - } - } - traversePre(fun[3], [&](Ref node) { - Ref type = node[0]; - if (type == NAME) { - IString name = node[1]->getIString(); - IString minified = newNames[name]; - if (!!minified) { - node[1]->setString(minified); - } else if (asmData.isLocal(name)) { - minified = getNextMinifiedName(); - newNames[name] = minified; - node[1]->setString(minified); - } - } else if (type == VAR) { - for (size_t i = 0; i < node[1]->size(); i++) { - Ref defn = node[1][i]; - IString name = defn[0]->getIString(); - if (!(newNames.has(name))) { - newNames[name] = getNextMinifiedName(); - } - defn[0]->setString(newNames[name]); - } - } else if (type == LABEL) { - IString name = node[1]->getIString(); - if (!newLabels.has(name)) { - newLabels[name] = getNextMinifiedLabel(); - } - node[1]->setString(newLabels[name]); - } else if (type == BREAK || type == CONTINUE) { - if (node->size() > 1 && !!node[1]) { - node[1]->setString(newLabels[node[1]->getIString()]); - } - } - }); - }); -} - -void asmLastOpts(Ref ast) { - std::vector statsStack; - traverseFunctions(ast, [&](Ref fun) { - traversePrePost(fun, [&](Ref node) { - Ref type = node[0]; - Ref stats = getStatements(node); - if (!!stats) statsStack.push_back(stats); - if (CONDITION_CHECKERS.has(type)) { - node[1] = simplifyCondition(node[1]); - } - if (type == WHILE && node[1][0] == NUM && node[1][1]->getNumber() == 1 && node[2][0] == BLOCK && node[2]->size() == 2) { - // This is at the end of the pipeline, we can assume all other optimizations are done, and we modify loops - // into shapes that might confuse other passes - - // while (1) { .. if (..) { break } } ==> do { .. } while(..) - Ref stats = node[2][1]; - Ref last = stats->back(); - if (!!last && last[0] == IF && (last->size() < 4 || !last[3]) && last[2][0] == BLOCK && !!last[2][1][0]) { - Ref lastStats = last[2][1]; - int lastNum = lastStats->size(); - Ref lastLast = lastStats[lastNum-1]; - if (!(lastLast[0] == BREAK && !lastLast[1])) return;// if not a simple break, dangerous - for (int i = 0; i < lastNum; i++) { - if (lastStats[i][0] != STAT && lastStats[i][0] != BREAK) return; // something dangerous - } - // ok, a bunch of statements ending in a break - bool abort = false; - int stack = 0; - int breaks = 0; - traversePrePost(stats, [&](Ref node) { - Ref type = node[0]; - if (type == CONTINUE) { - if (stack == 0 || !!node[1]) { // abort if labeled (we do not analyze labels here yet), or a continue directly on us - abort = true; - } - } else if (type == BREAK) { - if (stack == 0 || !!node[1]) { // relevant if labeled (we do not analyze labels here yet), or a break directly on us - breaks++; - } - } else if (LOOP.has(type)) { - stack++; - } - }, [&](Ref node) { - if (LOOP.has(node[0])) { - stack--; - } - }); - if (abort) return; - assert(breaks > 0); - if (lastStats->size() > 1 && breaks != 1) return; // if we have code aside from the break, we can only move it out if there is just one break - if (statsStack.size() < 1) return; // no chance we have this stats on hand - // start to optimize - if (lastStats->size() > 1) { - Ref parent = statsStack.back(); - int me = parent->indexOf(node); - if (me < 0) return; // not always directly on a stats, could be in a label for example - parent->insert(me+1, lastStats->size()-1); - for (size_t i = 0; i+1 < lastStats->size(); i++) { - parent[me+1+i] = lastStats[i]; - } - } - Ref conditionToBreak = last[1]; - stats->pop_back(); - node[0]->setString(DO); - node[1] = simplifyNotCompsDirect(make2(UNARY_PREFIX, L_NOT, conditionToBreak)); - } - } else if (type == BINARY) { - if (node[1] == AND) { - if (node[3][0] == UNARY_PREFIX && node[3][1] == MINUS && node[3][2][0] == NUM && node[3][2][1]->getNumber() == 1) { - // Change &-1 into |0, at this point the hint is no longer needed - node[1]->setString(OR); - node[3] = node[3][2]; - node[3][1]->setNumber(0); - } - } else if (node[1] == MINUS && node[3][0] == UNARY_PREFIX) { - // avoid X - (-Y) because some minifiers buggily emit X--Y which is invalid as -- can be a unary. Transform to - // X + Y - if (node[3][1] == MINUS) { // integer - node[1]->setString(PLUS); - node[3] = node[3][2]; - } else if (node[3][1] == PLUS) { // float - if (node[3][2][0] == UNARY_PREFIX && node[3][2][1] == MINUS) { - node[1]->setString(PLUS); - node[3][2] = node[3][2][2]; - } - } - } - } - }, [&](Ref node) { - if (statsStack.size() > 0) { - Ref stats = getStatements(node); - if (!!stats) statsStack.pop_back(); - } - }); - // convert { singleton } into singleton - traversePre(fun, [](Ref node) { - if (node[0] == BLOCK && !!getStatements(node) && node[1]->size() == 1) { - safeCopy(node, node[1][0]); - } - }); - // convert L: do { .. } while(0) into L: { .. } - traversePre(fun, [](Ref node) { - if (node[0] == LABEL && node[1]->isString() /* careful of var label = 5 */ && - node[2][0] == DO && node[2][1][0] == NUM && node[2][1][1]->getNumber() == 0 && node[2][2][0] == BLOCK) { - // there shouldn't be any continues on this, not direct break or continue - IString label = node[1]->getIString(); - bool abort = false; - int breakCaptured = 0, continueCaptured = 0; - traversePrePost(node[2][2], [&](Ref node) { - if (node[0] == CONTINUE) { - if (!node[1] && !continueCaptured) { - abort = true; - } else if (node[1]->isString() && node[1]->getIString() == label) { - abort = true; - } - } - if (node[0] == BREAK && !node[1] && !breakCaptured) { - abort = true; - } - if (BREAK_CAPTURERS.has(node[0])) { - breakCaptured++; - } - if (CONTINUE_CAPTURERS.has(node[0])) { - continueCaptured++; - } - }, [&](Ref node) { - if (BREAK_CAPTURERS.has(node[0])) { - breakCaptured--; - } - if (CONTINUE_CAPTURERS.has(node[0])) { - continueCaptured--; - } - }); - if (abort) return; - safeCopy(node[2], node[2][2]); - } - }); - }); -} - -// Contrary to the name this does not eliminate actual dead functions, only -// those marked as such with DEAD_FUNCTIONS -void eliminateDeadFuncs(Ref ast) { - assert(!!extraInfo); - IString DEAD_FUNCTIONS("dead_functions"); - IString ABORT("abort"); - assert(extraInfo->has(DEAD_FUNCTIONS)); - StringSet deadFunctions; - for (size_t i = 0; i < extraInfo[DEAD_FUNCTIONS]->size(); i++) { - deadFunctions.insert(extraInfo[DEAD_FUNCTIONS][i]->getIString()); - } - traverseFunctions(ast, [&](Ref fun) { - if (!deadFunctions.has(fun[1].get()->getIString())) { - return; - } - AsmData asmData(fun); - fun[3]->setSize(1); - fun[3][0] = make1(STAT, make2(CALL, makeName(ABORT), &(makeArray(1))->push_back(makeNum(-1)))); - asmData.vars.clear(); - asmData.denormalize(); - }); -} - diff --git a/src/optimizer.h b/src/optimizer.h deleted file mode 100644 index 5edcaad87..000000000 --- a/src/optimizer.h +++ /dev/null @@ -1,127 +0,0 @@ - -#ifndef __optimizer_h__ -#define __optimizer_h__ - -#include "simple_ast.h" - -extern bool preciseF32, - receiveJSON, - emitJSON, - minifyWhitespace, - last; - -extern cashew::Ref extraInfo; - -void eliminateDeadFuncs(cashew::Ref ast); -void eliminate(cashew::Ref ast, bool memSafe=false); -void eliminateMemSafe(cashew::Ref ast); -void simplifyExpressions(cashew::Ref ast); -void optimizeFrounds(cashew::Ref ast); -void simplifyIfs(cashew::Ref ast); -void registerize(cashew::Ref ast); -void registerizeHarder(cashew::Ref ast); -void minifyLocals(cashew::Ref ast); -void asmLastOpts(cashew::Ref ast); - -// - -enum AsmType { - ASM_INT = 0, - ASM_DOUBLE, - ASM_FLOAT, - ASM_FLOAT32X4, - ASM_FLOAT64X2, - ASM_INT8X16, - ASM_INT16X8, - ASM_INT32X4, - ASM_NONE // number of types -}; - -struct AsmData; - -AsmType detectType(cashew::Ref node, AsmData *asmData=nullptr, bool inVarDef=false); - -struct AsmData { - struct Local { - Local() {} - Local(AsmType type, bool param) : type(type), param(param) {} - AsmType type; - bool param; // false if a var - }; - typedef std::unordered_map Locals; - - Locals locals; - std::vector params; // in order - std::vector vars; // in order - AsmType ret; - - cashew::Ref func; - - AsmType getType(const cashew::IString& name) { - auto ret = locals.find(name); - if (ret != locals.end()) return ret->second.type; - return ASM_NONE; - } - void setType(const cashew::IString& name, AsmType type) { - locals[name].type = type; - } - - bool isLocal(const cashew::IString& name) { - return locals.count(name) > 0; - } - bool isParam(const cashew::IString& name) { - return isLocal(name) && locals[name].param; - } - bool isVar(const cashew::IString& name) { - return isLocal(name) && !locals[name].param; - } - - AsmData() {} // if you want to fill in the data yourself - AsmData(cashew::Ref f); // if you want to read data from f, and modify it as you go (parallel to denormalize) - - void denormalize(); - - void addParam(cashew::IString name, AsmType type) { - locals[name] = Local(type, true); - params.push_back(name); - } - void addVar(cashew::IString name, AsmType type) { - locals[name] = Local(type, false); - vars.push_back(name); - } - - void deleteVar(cashew::IString name) { - locals.erase(name); - for (size_t i = 0; i < vars.size(); i++) { - if (vars[i] == name) { - vars.erase(vars.begin() + i); - break; - } - } - } -}; - -bool isInteger(double x); - -bool isInteger32(double x); - -extern cashew::IString ASM_FLOAT_ZERO; - -extern cashew::IString SIMD_INT8X16_CHECK, - SIMD_INT16X8_CHECK, - SIMD_INT32X4_CHECK, - SIMD_FLOAT32X4_CHECK, - SIMD_FLOAT64X2_CHECK; - -int parseInt(const char *str); - -struct HeapInfo { - bool valid, unsign, floaty; - int bits; - AsmType type; -}; - -HeapInfo parseHeap(const char *name); - -#endif // __optimizer_h__ - diff --git a/src/parser.cpp b/src/parser.cpp deleted file mode 100644 index 06e455b0b..000000000 --- a/src/parser.cpp +++ /dev/null @@ -1,144 +0,0 @@ - -#include "parser.h" - -namespace cashew { - -// common strings - -IString TOPLEVEL("toplevel"), - DEFUN("defun"), - BLOCK("block"), - STAT("stat"), - ASSIGN("assign"), - NAME("name"), - VAR("var"), - CONST("const"), - CONDITIONAL("conditional"), - BINARY("binary"), - RETURN("return"), - IF("if"), - ELSE("else"), - WHILE("while"), - DO("do"), - FOR("for"), - SEQ("seq"), - SUB("sub"), - CALL("call"), - NUM("num"), - LABEL("label"), - BREAK("break"), - CONTINUE("continue"), - SWITCH("switch"), - STRING("string"), - INF("inf"), - NaN("nan"), - TEMP_RET0("tempRet0"), - UNARY_PREFIX("unary-prefix"), - UNARY_POSTFIX("unary-postfix"), - MATH_FROUND("Math_fround"), - SIMD_FLOAT32X4("SIMD_Float32x4"), - SIMD_FLOAT64X2("SIMD_Float64x2"), - SIMD_INT8X16("SIMD_Int8x16"), - SIMD_INT16X8("SIMD_Int16x8"), - SIMD_INT32X4("SIMD_Int32x4"), - PLUS("+"), - MINUS("-"), - OR("|"), - AND("&"), - XOR("^"), - L_NOT("!"), - B_NOT("~"), - LT("<"), - GE(">="), - LE("<="), - GT(">"), - EQ("=="), - NE("!="), - DIV("/"), - MOD("%"), - MUL("*"), - RSHIFT(">>"), - LSHIFT("<<"), - TRSHIFT(">>>"), - TEMP_DOUBLE_PTR("tempDoublePtr"), - HEAP8("HEAP8"), - HEAP16("HEAP16"), - HEAP32("HEAP32"), - HEAPF32("HEAPF32"), - HEAPU8("HEAPU8"), - HEAPU16("HEAPU16"), - HEAPU32("HEAPU32"), - HEAPF64("HEAPF64"), - F0("f0"), - EMPTY(""), - FUNCTION("function"), - OPEN_PAREN("("), - OPEN_BRACE("["), - OPEN_CURLY("{"), - CLOSE_CURLY("}"), - COMMA(","), - QUESTION("?"), - COLON(":"), - CASE("case"), - DEFAULT("default"), - DOT("dot"), - PERIOD("."), - NEW("new"), - ARRAY("array"), - OBJECT("object"), - THROW("throw"), - SET("="); - -IStringSet keywords("var const function if else do while for break continue return switch case default throw try catch finally true false null new"); - -const char *OPERATOR_INITS = "+-*/%<>&^|~=!,?:.", - *SEPARATORS = "([;{}"; - -int MAX_OPERATOR_SIZE = 3; - -std::vector operatorClasses; - -static std::vector> precedences; // op, type => prec - -struct Init { - Init() { - // operators, rtl, type - operatorClasses.push_back(OperatorClass(".", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("! ~ + -", true, OperatorClass::Prefix)); - operatorClasses.push_back(OperatorClass("* / %", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("+ -", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("<< >> >>>", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("< <= > >=", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("== !=", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("&", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("^", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("|", false, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass("? :", true, OperatorClass::Tertiary)); - operatorClasses.push_back(OperatorClass("=", true, OperatorClass::Binary)); - operatorClasses.push_back(OperatorClass(",", true, OperatorClass::Binary)); - - precedences.resize(OperatorClass::Tertiary + 1); - - for (size_t prec = 0; prec < operatorClasses.size(); prec++) { - for (auto curr : operatorClasses[prec].ops) { - precedences[operatorClasses[prec].type][curr] = prec; - } - } - } -}; - -Init init; - -int OperatorClass::getPrecedence(Type type, IString op) { - return precedences[type][op]; -} - -bool OperatorClass::getRtl(int prec) { - return operatorClasses[prec].rtl; -} - -bool isIdentInit(char x) { return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || x == '_' || x == '$'; } -bool isIdentPart(char x) { return isIdentInit(x) || (x >= '0' && x <= '9'); } - -} // namespace cashew - diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index 8ca07c6ed..000000000 --- a/src/parser.h +++ /dev/null @@ -1,905 +0,0 @@ -// Pure parsing. Calls methods on a Builder (template argument) to actually construct the AST -// -// XXX All parsing methods assume they take ownership of the input string. This lets them reuse -// parts of it. You will segfault if the input string cannot be reused and written to. - -#ifndef __parser_h__ -#define __parser_h__ - -#include -#include -#include - -#include - -#include "istring.h" - -namespace cashew { - -// common strings - -extern IString TOPLEVEL, - DEFUN, - BLOCK, - STAT, - ASSIGN, - NAME, - VAR, - CONST, - CONDITIONAL, - BINARY, - RETURN, - IF, - ELSE, - WHILE, - DO, - FOR, - SEQ, - SUB, - CALL, - NUM, - LABEL, - BREAK, - CONTINUE, - SWITCH, - STRING, - INF, - NaN, - TEMP_RET0, - UNARY_PREFIX, - UNARY_POSTFIX, - MATH_FROUND, - SIMD_FLOAT32X4, - SIMD_FLOAT64X2, - SIMD_INT8X16, - SIMD_INT16X8, - SIMD_INT32X4, - PLUS, - MINUS, - OR, - AND, - XOR, - L_NOT, - B_NOT, - LT, - GE, - LE, - GT, - EQ, - NE, - DIV, - MOD, - MUL, - RSHIFT, - LSHIFT, - TRSHIFT, - TEMP_DOUBLE_PTR, - HEAP8, - HEAP16, - HEAP32, - HEAPF32, - HEAPU8, - HEAPU16, - HEAPU32, - HEAPF64, - F0, - EMPTY, - FUNCTION, - OPEN_PAREN, - OPEN_BRACE, - OPEN_CURLY, - CLOSE_CURLY, - COMMA, - QUESTION, - COLON, - CASE, - DEFAULT, - DOT, - PERIOD, - NEW, - ARRAY, - OBJECT, - THROW, - SET; - -extern IStringSet keywords; - -extern const char *OPERATOR_INITS, *SEPARATORS; - -extern int MAX_OPERATOR_SIZE, LOWEST_PREC; - -struct OperatorClass { - enum Type { - Binary = 0, - Prefix = 1, - Postfix = 2, - Tertiary = 3 - }; - - IStringSet ops; - bool rtl; - Type type; - - OperatorClass(const char* o, bool r, Type t) : ops(o), rtl(r), type(t) {} - - static int getPrecedence(Type type, IString op); - static bool getRtl(int prec); -}; - -extern std::vector operatorClasses; - -extern bool isIdentInit(char x); -extern bool isIdentPart(char x); - -// parser - -template -class Parser { - - static bool isSpace(char x) { return x == 32 || x == 9 || x == 10 || x == 13; } /* space, tab, linefeed/newline, or return */ - static void skipSpace(char*& curr) { - while (*curr) { - if (isSpace(*curr)) { - curr++; - continue; - } - if (curr[0] == '/' && curr[1] == '/') { - curr += 2; - while (*curr && *curr != '\n') curr++; - if (*curr) curr++; - continue; - } - if (curr[0] == '/' && curr[1] == '*') { - curr += 2; - while (*curr && (curr[0] != '*' || curr[1] != '/')) curr++; - curr += 2; - continue; - } - return; - } - } - - static bool isDigit(char x) { return x >= '0' && x <= '9'; } - - static bool hasChar(const char* list, char x) { while (*list) if (*list++ == x) return true; return false; } - - static bool is32Bit(double x) { - return x == (int)x || x == (unsigned int)x; - } - - // An atomic fragment of something. Stops at a natural boundary. - enum FragType { - KEYWORD = 0, - OPERATOR = 1, - IDENT = 2, - STRING = 3, // without quotes - INT = 4, - DOUBLE = 5, - SEPARATOR = 6 - }; - - struct Frag { -#ifndef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf - union { -#endif - IString str; - double num; -#ifndef _MSC_VER - }; -#endif - int size; - FragType type; - - bool isNumber() const { - return type == INT || type == DOUBLE; - } - - explicit Frag(char* src) { - char *start = src; - if (isIdentInit(*src)) { - // read an identifier or a keyword - src++; - while (isIdentPart(*src)) { - src++; - } - if (*src == 0) { - str.set(start); - } else { - char temp = *src; - *src = 0; - str.set(start, false); - *src = temp; - } - type = keywords.has(str) ? KEYWORD : IDENT; - } else if (isDigit(*src) || (src[0] == '.' && isDigit(src[1]))) { - if (src[0] == '0' && (src[1] == 'x' || src[1] == 'X')) { - // Explicitly parse hex numbers of form "0x...", because strtod - // supports hex number strings only in C++11, and Visual Studio 2013 does - // not yet support that functionality. - src += 2; - num = 0; - while (1) { - if (*src >= '0' && *src <= '9') { num *= 16; num += *src - '0'; } - else if (*src >= 'a' && *src <= 'f') { num *= 16; num += *src - 'a' + 10; } - else if (*src >= 'A' && *src <= 'F') { num *= 16; num += *src - 'F' + 10; } - else break; - src++; - } - } else { - num = strtod(start, &src); - } - // asm.js must have a '.' for double values. however, we also tolerate - // uglify's tendency to emit without a '.' (and fix it later with a +). - // for valid asm.js input, the '.' should be enough, and for uglify - // in the emscripten optimizer pipeline, we use simple_ast where INT/DOUBLE - // is quite the same at this point anyhow - type = (std::find(start, src, '.') == src && is32Bit(num)) ? INT : DOUBLE; - assert(src > start); - } else if (hasChar(OPERATOR_INITS, *src)) { - switch (*src) { - case '!': str = src[1] == '=' ? NE : L_NOT; break; - case '%': str = MOD; break; - case '&': str = AND; break; - case '*': str = MUL; break; - case '+': str = PLUS; break; - case ',': str = COMMA; break; - case '-': str = MINUS; break; - case '.': str = PERIOD; break; - case '/': str = DIV; break; - case ':': str = COLON; break; - case '<': str = src[1] == '<' ? LSHIFT : (src[1] == '=' ? LE : LT); break; - case '=': str = src[1] == '=' ? EQ : SET; break; - case '>': str = src[1] == '>' ? (src[2] == '>' ? TRSHIFT : RSHIFT) : (src[1] == '=' ? GE : GT); break; - case '?': str = QUESTION; break; - case '^': str = XOR; break; - case '|': str = OR; break; - case '~': str = B_NOT; break; - default: abort(); - } - size = strlen(str.str); -#ifndef NDEBUG - char temp = start[size]; - start[size] = 0; - assert(strcmp(str.str, start) == 0); - start[size] = temp; -#endif - type = OPERATOR; - return; - } else if (hasChar(SEPARATORS, *src)) { - type = SEPARATOR; - char temp = src[1]; - src[1] = 0; - str.set(src, false); - src[1] = temp; - src++; - } else if (*src == '"' || *src == '\'') { - char *end = strchr(src+1, *src); - *end = 0; - str.set(src+1); - src = end+1; - type = STRING; - } else { - dump("frag parsing", src); - abort(); - } - size = src - start; - } - }; - - struct ExpressionElement { - bool isNode; -#ifndef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf - union { -#endif - NodeRef node; - IString op; -#ifndef _MSC_VER - }; -#endif - ExpressionElement(NodeRef n) : isNode(true), node(n) {} - ExpressionElement(IString o) : isNode(false), op(o) {} - - NodeRef getNode() { - assert(isNode); - return node; - } - IString getOp() { - assert(!isNode); - return op; - } - }; - - // This is a list of the current stack of node-operator-node-operator-etc. - // this works by each parseExpression call appending to the vector; then recursing out, and the toplevel sorts it all - typedef std::vector ExpressionParts; - std::vector expressionPartsStack; - - // Parses an element in a list of such elements, e.g. list of statements in a block, or list of parameters in a call - NodeRef parseElement(char*& src, const char* seps=";") { - //dump("parseElement", src); - skipSpace(src); - Frag frag(src); - src += frag.size; - switch (frag.type) { - case KEYWORD: { - return parseAfterKeyword(frag, src, seps); - } - case IDENT: { - return parseAfterIdent(frag, src, seps); - } - case STRING: - case INT: - case DOUBLE: { - return parseExpression(parseFrag(frag), src, seps); - } - case SEPARATOR: { - if (frag.str == OPEN_PAREN) return parseExpression(parseAfterParen(src), src, seps); - if (frag.str == OPEN_BRACE) return parseExpression(parseAfterBrace(src), src, seps); - if (frag.str == OPEN_CURLY) return parseExpression(parseAfterCurly(src), src, seps); - abort(); - } - case OPERATOR: { - return parseExpression(frag.str, src, seps); - } - default: /* dump("parseElement", src); printf("bad frag type: %d\n", frag.type); */ abort(); - } - return nullptr; - } - - NodeRef parseFrag(Frag& frag) { - switch (frag.type) { - case IDENT: return Builder::makeName(frag.str); - case STRING: return Builder::makeString(frag.str); - case INT: return Builder::makeInt(uint32_t(frag.num)); - case DOUBLE: return Builder::makeDouble(frag.num); - default: abort(); - } - return nullptr; - } - - NodeRef parseAfterKeyword(Frag& frag, char*& src, const char* seps) { - skipSpace(src); - if (frag.str == FUNCTION) return parseFunction(src, seps); - else if (frag.str == VAR) return parseVar(src, seps, false); - else if (frag.str == CONST) return parseVar(src, seps, true); - else if (frag.str == RETURN) return parseReturn(src, seps); - else if (frag.str == IF) return parseIf(src, seps); - else if (frag.str == DO) return parseDo(src, seps); - else if (frag.str == WHILE) return parseWhile(src, seps); - else if (frag.str == BREAK) return parseBreak(src, seps); - else if (frag.str == CONTINUE) return parseContinue(src, seps); - else if (frag.str == SWITCH) return parseSwitch(src, seps); - else if (frag.str == NEW) return parseNew(src, seps); - dump(frag.str.str, src); - abort(); - return nullptr; - } - - NodeRef parseFunction(char*& src, const char* seps) { - Frag name(src); - if (name.type == IDENT) { - src += name.size; - } else { - assert(name.type == SEPARATOR && name.str[0] == '('); - name.str = IString(); - } - NodeRef ret = Builder::makeFunction(name.str); - skipSpace(src); - assert(*src == '('); - src++; - while (1) { - skipSpace(src); - if (*src == ')') break; - Frag arg(src); - assert(arg.type == IDENT); - src += arg.size; - Builder::appendArgumentToFunction(ret, arg.str); - skipSpace(src); - if (*src == ')') break; - if (*src == ',') { - src++; - continue; - } - abort(); - } - src++; - Builder::setBlockContent(ret, parseBracketedBlock(src)); - // TODO: parse expression? - return ret; - } - - NodeRef parseVar(char*& src, const char* seps, bool is_const) { - NodeRef ret = Builder::makeVar(is_const); - while (1) { - skipSpace(src); - if (*src == ';') break; - Frag name(src); - assert(name.type == IDENT); - NodeRef value; - src += name.size; - skipSpace(src); - if (*src == '=') { - src++; - skipSpace(src); - value = parseElement(src, ";,"); - } - Builder::appendToVar(ret, name.str, value); - skipSpace(src); - if (*src == ';') break; - if (*src == ',') { - src++; - continue; - } - abort(); - } - src++; - return ret; - } - - NodeRef parseReturn(char*& src, const char* seps) { - skipSpace(src); - NodeRef value = !hasChar(seps, *src) ? parseElement(src, seps) : nullptr; - skipSpace(src); - assert(hasChar(seps, *src)); - if (*src == ';') src++; - return Builder::makeReturn(value); - } - - NodeRef parseIf(char*& src, const char* seps) { - NodeRef condition = parseParenned(src); - NodeRef ifTrue = parseMaybeBracketed(src, seps); - skipSpace(src); - NodeRef ifFalse; - if (!hasChar(seps, *src)) { - Frag next(src); - if (next.type == KEYWORD && next.str == ELSE) { - src += next.size; - ifFalse = parseMaybeBracketed(src, seps); - } - } - return Builder::makeIf(condition, ifTrue, ifFalse); - } - - NodeRef parseDo(char*& src, const char* seps) { - NodeRef body = parseMaybeBracketed(src, seps); - skipSpace(src); - Frag next(src); - assert(next.type == KEYWORD && next.str == WHILE); - src += next.size; - NodeRef condition = parseParenned(src); - return Builder::makeDo(body, condition); - } - - NodeRef parseWhile(char*& src, const char* seps) { - NodeRef condition = parseParenned(src); - NodeRef body = parseMaybeBracketed(src, seps); - return Builder::makeWhile(condition, body); - } - - NodeRef parseBreak(char*& src, const char* seps) { - skipSpace(src); - Frag next(src); - if (next.type == IDENT) src += next.size; - return Builder::makeBreak(next.type == IDENT ? next.str : IString()); - } - - NodeRef parseContinue(char*& src, const char* seps) { - skipSpace(src); - Frag next(src); - if (next.type == IDENT) src += next.size; - return Builder::makeContinue(next.type == IDENT ? next.str : IString()); - } - - NodeRef parseSwitch(char*& src, const char* seps) { - NodeRef ret = Builder::makeSwitch(parseParenned(src)); - skipSpace(src); - assert(*src == '{'); - src++; - while (1) { - // find all cases and possibly a default - skipSpace(src); - if (*src == '}') break; - Frag next(src); - if (next.type == KEYWORD) { - if (next.str == CASE) { - src += next.size; - skipSpace(src); - NodeRef arg; - Frag value(src); - if (value.isNumber()) { - arg = parseFrag(value); - src += value.size; - } else { - assert(value.type == OPERATOR); - assert(value.str == MINUS); - src += value.size; - skipSpace(src); - Frag value2(src); - assert(value2.isNumber()); - arg = Builder::makePrefix(MINUS, parseFrag(value2)); - src += value2.size; - } - Builder::appendCaseToSwitch(ret, arg); - skipSpace(src); - assert(*src == ':'); - src++; - continue; - } else if (next.str == DEFAULT) { - src += next.size; - Builder::appendDefaultToSwitch(ret); - skipSpace(src); - assert(*src == ':'); - src++; - continue; - } - // otherwise, may be some keyword that happens to start a block (e.g. case 1: _return_ 5) - } - // not case X: or default: or }, so must be some code - skipSpace(src); - bool explicitBlock = *src == '{'; - NodeRef subBlock = explicitBlock ? parseBracketedBlock(src) : parseBlock(src, ";}", CASE, DEFAULT); - Builder::appendCodeToSwitch(ret, subBlock, explicitBlock); - } - skipSpace(src); - assert(*src == '}'); - src++; - return ret; - } - - NodeRef parseNew(char*& src, const char* seps) { - return Builder::makeNew(parseElement(src, seps)); - } - - NodeRef parseAfterIdent(Frag& frag, char*& src, const char* seps) { - skipSpace(src); - if (*src == '(') return parseExpression(parseCall(parseFrag(frag), src), src, seps); - if (*src == '[') return parseExpression(parseIndexing(parseFrag(frag), src), src, seps); - if (*src == ':' && expressionPartsStack.back().size() == 0) { - src++; - skipSpace(src); - NodeRef inner; - if (*src == '{') { // context lets us know this is not an object, but a block - inner = parseBracketedBlock(src); - } else { - inner = parseElement(src, seps); - } - return Builder::makeLabel(frag.str, inner); - } - if (*src == '.') return parseExpression(parseDotting(parseFrag(frag), src), src, seps); - return parseExpression(parseFrag(frag), src, seps); - } - - NodeRef parseCall(NodeRef target, char*& src) { - expressionPartsStack.resize(expressionPartsStack.size()+1); - assert(*src == '('); - src++; - NodeRef ret = Builder::makeCall(target); - while (1) { - skipSpace(src); - if (*src == ')') break; - Builder::appendToCall(ret, parseElement(src, ",)")); - skipSpace(src); - if (*src == ')') break; - if (*src == ',') { - src++; - continue; - } - abort(); - } - src++; - assert(expressionPartsStack.back().size() == 0); - expressionPartsStack.pop_back(); - return ret; - } - - NodeRef parseIndexing(NodeRef target, char*& src) { - expressionPartsStack.resize(expressionPartsStack.size()+1); - assert(*src == '['); - src++; - NodeRef ret = Builder::makeIndexing(target, parseElement(src, "]")); - skipSpace(src); - assert(*src == ']'); - src++; - assert(expressionPartsStack.back().size() == 0); - expressionPartsStack.pop_back(); - return ret; - } - - NodeRef parseDotting(NodeRef target, char*& src) { - assert(*src == '.'); - src++; - Frag key(src); - assert(key.type == IDENT); - src += key.size; - return Builder::makeDot(target, key.str); - } - - NodeRef parseAfterParen(char*& src) { - expressionPartsStack.resize(expressionPartsStack.size()+1); - skipSpace(src); - NodeRef ret = parseElement(src, ")"); - skipSpace(src); - assert(*src == ')'); - src++; - assert(expressionPartsStack.back().size() == 0); - expressionPartsStack.pop_back(); - return ret; - } - - NodeRef parseAfterBrace(char*& src) { - expressionPartsStack.resize(expressionPartsStack.size()+1); - NodeRef ret = Builder::makeArray(); - while (1) { - skipSpace(src); - assert(*src); - if (*src == ']') break; - NodeRef element = parseElement(src, ",]"); - Builder::appendToArray(ret, element); - skipSpace(src); - if (*src == ']') break; - if (*src == ',') { - src++; - continue; - } - abort(); - } - src++; - return ret; - } - - NodeRef parseAfterCurly(char*& src) { - expressionPartsStack.resize(expressionPartsStack.size()+1); - NodeRef ret = Builder::makeObject(); - while (1) { - skipSpace(src); - assert(*src); - if (*src == '}') break; - Frag key(src); - assert(key.type == IDENT || key.type == STRING); - src += key.size; - skipSpace(src); - assert(*src == ':'); - src++; - NodeRef value = parseElement(src, ",}"); - Builder::appendToObject(ret, key.str, value); - skipSpace(src); - if (*src == '}') break; - if (*src == ',') { - src++; - continue; - } - abort(); - } - src++; - return ret; - } - - void dumpParts(ExpressionParts& parts, int i) { - printf("expressionparts: %d (at %d)\n", parts.size(), i); - printf("| "); - for (int i = 0; i < parts.size(); i++) { - if (parts[i].isNode) { - parts[i].getNode()->stringify(std::cout); - printf(" "); - } else { - printf(" _%s_ ", parts[i].getOp().str); - } - } - printf("|\n"); - } - - NodeRef makeBinary(NodeRef left, IString op, NodeRef right) { - if (op == PERIOD) { - return Builder::makeDot(left, right); - } else { - return Builder::makeBinary(left, op ,right); - } - } - - NodeRef parseExpression(ExpressionElement initial, char*&src, const char* seps) { - //dump("parseExpression", src); - ExpressionParts& parts = expressionPartsStack.back(); - skipSpace(src); - if (*src == 0 || hasChar(seps, *src)) { - if (parts.size() > 0) { - parts.push_back(initial); // cherry on top of the cake - } - return initial.getNode(); - } - bool top = parts.size() == 0; - if (initial.isNode) { - Frag next(src); - if (next.type == OPERATOR) { - parts.push_back(initial); - src += next.size; - parts.push_back(next.str); - } else { - if (*src == '(') { - initial = parseCall(initial.getNode(), src); - } else if (*src == '[') { - initial = parseIndexing(initial.getNode(), src); - } else { - dump("bad parseExpression state", src); - abort(); - } - return parseExpression(initial, src, seps); - } - } else { - parts.push_back(initial); - } - NodeRef last = parseElement(src, seps); - if (!top) return last; - { - ExpressionParts& parts = expressionPartsStack.back(); // |parts| may have been invalidated by that call - // we are the toplevel. sort it all out - // collapse right to left, highest priority first - //dumpParts(parts, 0); - for (auto& ops : operatorClasses) { - if (ops.rtl) { - // right to left - for (int i = parts.size()-1; i >= 0; i--) { - if (parts[i].isNode) continue; - IString op = parts[i].getOp(); - if (!ops.ops.has(op)) continue; - if (ops.type == OperatorClass::Binary && i > 0 && i < (int)parts.size()-1) { - parts[i] = makeBinary(parts[i-1].getNode(), op, parts[i+1].getNode()); - parts.erase(parts.begin() + i + 1); - parts.erase(parts.begin() + i - 1); - } else if (ops.type == OperatorClass::Prefix && i < (int)parts.size()-1) { - if (i > 0 && parts[i-1].isNode) continue; // cannot apply prefix operator if it would join two nodes - parts[i] = Builder::makePrefix(op, parts[i+1].getNode()); - parts.erase(parts.begin() + i + 1); - } else if (ops.type == OperatorClass::Tertiary) { - // we must be at X ? Y : Z - // ^ - //dumpParts(parts, i); - if (op != COLON) continue; - assert(i < (int)parts.size()-1 && i >= 3); - if (parts[i-2].getOp() != QUESTION) continue; // e.g. x ? y ? 1 : 0 : 2 - parts[i-3] = Builder::makeConditional(parts[i-3].getNode(), parts[i-1].getNode(), parts[i+1].getNode()); - parts.erase(parts.begin() + i - 2, parts.begin() + i + 2); - i = parts.size(); // basically a reset, due to things like x ? y ? 1 : 0 : 2 - } // TODO: postfix - } - } else { - // left to right - for (int i = 0; i < (int)parts.size(); i++) { - if (parts[i].isNode) continue; - IString op = parts[i].getOp(); - if (!ops.ops.has(op)) continue; - if (ops.type == OperatorClass::Binary && i > 0 && i < (int)parts.size()-1) { - parts[i] = makeBinary(parts[i-1].getNode(), op, parts[i+1].getNode()); - parts.erase(parts.begin() + i + 1); - parts.erase(parts.begin() + i - 1); - i--; - } else if (ops.type == OperatorClass::Prefix && i < (int)parts.size()-1) { - if (i > 0 && parts[i-1].isNode) continue; // cannot apply prefix operator if it would join two nodes - parts[i] = Builder::makePrefix(op, parts[i+1].getNode()); - parts.erase(parts.begin() + i + 1); - i = std::max(i-2, 0); // allow a previous prefix operator to cascade - } // TODO: tertiary, postfix - } - } - } - assert(parts.size() == 1); - NodeRef ret = parts[0].getNode(); - parts.clear(); - return ret; - } - } - - // Parses a block of code (e.g. a bunch of statements inside {,}, or the top level of o file) - NodeRef parseBlock(char*& src, const char* seps=";", IString keywordSep1=IString(), IString keywordSep2=IString()) { - NodeRef block = Builder::makeBlock(); - //dump("parseBlock", src); - while (1) { - skipSpace(src); - if (*src == 0) break; - if (*src == ';') { - src++; // skip a statement in this block - continue; - } - if (hasChar(seps, *src)) break; - if (!!keywordSep1) { - Frag next(src); - if (next.type == KEYWORD && next.str == keywordSep1) break; - } - if (!!keywordSep2) { - Frag next(src); - if (next.type == KEYWORD && next.str == keywordSep2) break; - } - NodeRef element = parseElementOrStatement(src, seps); - Builder::appendToBlock(block, element); - } - return block; - } - - NodeRef parseBracketedBlock(char*& src) { - skipSpace(src); - assert(*src == '{'); - src++; - NodeRef block = parseBlock(src, ";}"); // the two are not symmetrical, ; is just internally separating, } is the final one - parseBlock knows all this - assert(*src == '}'); - src++; - return block; - } - - NodeRef parseElementOrStatement(char*& src, const char *seps) { - skipSpace(src); - if (*src == ';') { - src++; - return Builder::makeBlock(); // we don't need the brackets here, but oh well - } - NodeRef ret = parseElement(src, seps); - skipSpace(src); - if (*src == ';') { - ret = Builder::makeStatement(ret); - src++; - } - return ret; - } - - NodeRef parseMaybeBracketed(char*& src, const char *seps) { - skipSpace(src); - return *src == '{' ? parseBracketedBlock(src) : parseElementOrStatement(src, seps); - } - - NodeRef parseParenned(char*& src) { - skipSpace(src); - assert(*src == '('); - src++; - NodeRef ret = parseElement(src, ")"); - skipSpace(src); - assert(*src == ')'); - src++; - return ret; - } - - // Debugging - - char *allSource; - int allSize; - - static void dump(const char *where, char* curr) { - /* - printf("%s:\n=============\n", where); - for (int i = 0; i < allSize; i++) printf("%c", allSource[i] ? allSource[i] : '?'); - printf("\n"); - for (int i = 0; i < (curr - allSource); i++) printf(" "); - printf("^\n=============\n"); - */ - fprintf(stderr, "%s:\n==========\n", where); - int newlinesLeft = 2; - int charsLeft = 200; - while (*curr) { - if (*curr == '\n') { - newlinesLeft--; - if (newlinesLeft == 0) break; - } - charsLeft--; - if (charsLeft == 0) break; - fprintf(stderr, "%c", *curr++); - } - fprintf(stderr, "\n\n"); - } - -public: - - Parser() : allSource(nullptr), allSize(0) { - expressionPartsStack.resize(1); - } - - // Highest-level parsing, as of a JavaScript script file. - NodeRef parseToplevel(char* src) { - allSource = src; - allSize = strlen(src); - NodeRef toplevel = Builder::makeToplevel(); - Builder::setBlockContent(toplevel, parseBlock(src)); - return toplevel; - } -}; - -} // namespace cashew - -#endif // __parser_h__ - diff --git a/src/simple_ast.cpp b/src/simple_ast.cpp deleted file mode 100644 index 89b8ce8ed..000000000 --- a/src/simple_ast.cpp +++ /dev/null @@ -1,259 +0,0 @@ - -#include "simple_ast.h" - -namespace cashew { - -// Ref methods - -Ref& Ref::operator[](unsigned x) { - return (*get())[x]; -} - -Ref& Ref::operator[](IString x) { - return (*get())[x]; -} - -bool Ref::operator==(const char *str) { - return get()->isString() && !strcmp(get()->str.str, str); -} - -bool Ref::operator!=(const char *str) { - return get()->isString() ? !!strcmp(get()->str.str, str) : true; -} - -bool Ref::operator==(const IString &str) { - return get()->isString() && get()->str == str; -} - -bool Ref::operator!=(const IString &str) { - return get()->isString() && get()->str != str; -} - -bool Ref::operator==(Ref other) { - return **this == *other; -} - -bool Ref::operator!() { - return !get() || get()->isNull(); -} - -// Arena - -Arena arena; - -Ref Arena::alloc() { - if (chunks.size() == 0 || index == CHUNK_SIZE) { - chunks.push_back(new Value[CHUNK_SIZE]); - index = 0; - } - return &chunks.back()[index++]; -} - -ArrayStorage* Arena::allocArray() { - if (arr_chunks.size() == 0 || arr_index == CHUNK_SIZE) { - arr_chunks.push_back(new ArrayStorage[CHUNK_SIZE]); - arr_index = 0; - } - return &arr_chunks.back()[arr_index++]; -} - -// dump - -void dump(const char *str, Ref node, bool pretty) { - std::cerr << str << ": "; - if (!!node) node->stringify(std::cerr, pretty); - else std::cerr << "(nullptr)"; - std::cerr << std::endl; -} - -// AST traversals - -// Traversals - -struct TraverseInfo { - TraverseInfo() {} - TraverseInfo(Ref node, ArrayStorage* arr) : node(node), arr(arr), index(0) {} - Ref node; - ArrayStorage* arr; - int index; -}; - -template -struct StackedStack { // a stack, on the stack - T stackStorage[init]; - T* storage; - int used, available; // used amount, available amount - bool alloced; - - StackedStack() : used(0), available(init), alloced(false) { - storage = stackStorage; - } - ~StackedStack() { - if (alloced) free(storage); - } - - int size() { return used; } - - void push_back(const T& t) { - assert(used <= available); - if (used == available) { - available *= 2; - if (!alloced) { - T* old = storage; - storage = (T*)malloc(sizeof(T)*available); - memcpy(storage, old, sizeof(T)*used); - alloced = true; - } else { - T *newStorage = (T*)realloc(storage, sizeof(T)*available); - assert(newStorage); - storage = newStorage; - } - } - assert(used < available); - assert(storage); - storage[used++] = t; - } - - T& back() { - assert(used > 0); - return storage[used-1]; - } - - void pop_back() { - assert(used > 0); - used--; - } -}; - -#define visitable(node) (node->isArray() && node->size() > 0) - -#define TRAV_STACK 40 - -// Traverse, calling visit before the children -void traversePre(Ref node, std::function visit) { - if (!visitable(node)) return; - visit(node); - StackedStack stack; - int index = 0; - ArrayStorage* arr = &node->getArray(); - int arrsize = (int)arr->size(); - Ref* arrdata = arr->data(); - stack.push_back(TraverseInfo(node, arr)); - while (1) { - if (index < arrsize) { - Ref sub = *(arrdata+index); - index++; - if (visitable(sub)) { - stack.back().index = index; - index = 0; - visit(sub); - arr = &sub->getArray(); - arrsize = (int)arr->size(); - arrdata = arr->data(); - stack.push_back(TraverseInfo(sub, arr)); - } - } else { - stack.pop_back(); - if (stack.size() == 0) break; - TraverseInfo& back = stack.back(); - index = back.index; - arr = back.arr; - arrsize = (int)arr->size(); - arrdata = arr->data(); - } - } -} - -// Traverse, calling visitPre before the children and visitPost after -void traversePrePost(Ref node, std::function visitPre, std::function visitPost) { - if (!visitable(node)) return; - visitPre(node); - StackedStack stack; - int index = 0; - ArrayStorage* arr = &node->getArray(); - int arrsize = (int)arr->size(); - Ref* arrdata = arr->data(); - stack.push_back(TraverseInfo(node, arr)); - while (1) { - if (index < arrsize) { - Ref sub = *(arrdata+index); - index++; - if (visitable(sub)) { - stack.back().index = index; - index = 0; - visitPre(sub); - arr = &sub->getArray(); - arrsize = (int)arr->size(); - arrdata = arr->data(); - stack.push_back(TraverseInfo(sub, arr)); - } - } else { - visitPost(stack.back().node); - stack.pop_back(); - if (stack.size() == 0) break; - TraverseInfo& back = stack.back(); - index = back.index; - arr = back.arr; - arrsize = (int)arr->size(); - arrdata = arr->data(); - } - } -} - -// Traverse, calling visitPre before the children and visitPost after. If pre returns false, do not traverse children -void traversePrePostConditional(Ref node, std::function visitPre, std::function visitPost) { - if (!visitable(node)) return; - if (!visitPre(node)) return; - StackedStack stack; - int index = 0; - ArrayStorage* arr = &node->getArray(); - int arrsize = (int)arr->size(); - Ref* arrdata = arr->data(); - stack.push_back(TraverseInfo(node, arr)); - while (1) { - if (index < arrsize) { - Ref sub = *(arrdata+index); - index++; - if (visitable(sub)) { - if (visitPre(sub)) { - stack.back().index = index; - index = 0; - arr = &sub->getArray(); - arrsize = (int)arr->size(); - arrdata = arr->data(); - stack.push_back(TraverseInfo(sub, arr)); - } - } - } else { - visitPost(stack.back().node); - stack.pop_back(); - if (stack.size() == 0) break; - TraverseInfo& back = stack.back(); - index = back.index; - arr = back.arr; - arrsize = (int)arr->size(); - arrdata = arr->data(); - } - } -} - -// Traverses all the top-level functions in the document -void traverseFunctions(Ref ast, std::function visit) { - if (!ast || ast->size() == 0) return; - if (ast[0] == TOPLEVEL) { - Ref stats = ast[1]; - for (size_t i = 0; i < stats->size(); i++) { - Ref curr = stats[i]; - if (curr[0] == DEFUN) visit(curr); - } - } else if (ast[0] == DEFUN) { - visit(ast); - } -} - -// ValueBuilder - -IStringSet ValueBuilder::statable("assign call binary unary-prefix if name num conditional dot new sub seq string object array"); - -} // namespace cashew - diff --git a/src/simple_ast.h b/src/simple_ast.h deleted file mode 100644 index 4012ff8f6..000000000 --- a/src/simple_ast.h +++ /dev/null @@ -1,1542 +0,0 @@ - -#ifndef __simple_ast_h__ -#define __simple_ast_h__ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "parser.h" - -#include "snprintf.h" - -#define err(str) fprintf(stderr, str "\n"); -#define errv(str, ...) fprintf(stderr, str "\n", __VA_ARGS__); -#define printErr err - -namespace cashew { - -struct Ref; -struct Value; - -void dump(const char *str, Ref node, bool pretty=false); - -// Reference to a value, plus some operators for convenience -struct Ref { - Value* inst; - - Ref(Value *v=nullptr) : inst(v) {} - - Value* get() { return inst; } - - Value& operator*() { return *inst; } - Value* operator->() { return inst; } - Ref& operator[](unsigned x); - Ref& operator[](IString x); - - // special conveniences - bool operator==(const char *str); // comparison to string, which is by value - bool operator!=(const char *str); - bool operator==(const IString &str); - bool operator!=(const IString &str); - bool operator==(double d) { abort(); return false; } // prevent Ref == number, which is potentially ambiguous; use ->getNumber() == number - bool operator==(Ref other); - bool operator!(); // check if null, in effect -}; - -// Arena allocation, free it all on process exit - -typedef std::vector ArrayStorage; - -struct Arena { - #define CHUNK_SIZE 1000 - std::vector chunks; - int index; // in last chunk - - std::vector arr_chunks; - int arr_index; - - Arena() : index(0), arr_index(0) {} - - Ref alloc(); - ArrayStorage* allocArray(); -}; - -extern Arena arena; - -// Main value type -struct Value { - enum Type { - String = 0, - Number = 1, - Array = 2, - Null = 3, - Bool = 4, - Object = 5 - }; - - Type type; - - typedef std::unordered_map ObjectStorage; - -#ifdef _MSC_VER // MSVC does not allow unrestricted unions: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf - IString str; -#endif - union { // TODO: optimize -#ifndef _MSC_VER - IString str; -#endif - double num; - ArrayStorage *arr; - bool boo; - ObjectStorage *obj; - }; - - // constructors all copy their input - Value() : type(Null), num(0) {} - explicit Value(const char *s) : type(Null) { - setString(s); - } - explicit Value(double n) : type(Null) { - setNumber(n); - } - explicit Value(ArrayStorage &a) : type(Null) { - setArray(); - *arr = a; - } - // no bool constructor - would endanger the double one (int might convert the wrong way) - - ~Value() { - free(); - } - - void free() { - if (type == Array) { arr->clear(); arr->shrink_to_fit(); } - else if (type == Object) delete obj; - type = Null; - num = 0; - } - - Value& setString(const char *s) { - free(); - type = String; - str.set(s); - return *this; - } - Value& setString(const IString &s) { - free(); - type = String; - str.set(s); - return *this; - } - Value& setNumber(double n) { - free(); - type = Number; - num = n; - return *this; - } - Value& setArray(ArrayStorage &a) { - free(); - type = Array; - arr = arena.allocArray(); - *arr = a; - return *this; - } - Value& setArray(int size_hint=0) { - free(); - type = Array; - arr = arena.allocArray(); - arr->reserve(size_hint); - return *this; - } - Value& setNull() { - free(); - type = Null; - return *this; - } - Value& setBool(bool b) { // Bool in the name, as otherwise might overload over int - free(); - type = Bool; - boo = b; - return *this; - } - Value& setObject() { - free(); - type = Object; - obj = new ObjectStorage(); - return *this; - } - - bool isString() { return type == String; } - bool isNumber() { return type == Number; } - bool isArray() { return type == Array; } - bool isNull() { return type == Null; } - bool isBool() { return type == Bool; } - bool isObject() { return type == Object; } - - bool isBool(bool b) { return type == Bool && b == boo; } // avoid overloading == as it might overload over int - - const char* getCString() { - assert(isString()); - return str.str; - } - IString& getIString() { - assert(isString()); - return str; - } - double& getNumber() { - assert(isNumber()); - return num; - } - ArrayStorage& getArray() { - assert(isArray()); - return *arr; - } - bool& getBool() { - assert(isBool()); - return boo; - } - - int getInteger() { // convenience function to get a known integer - assert(fmod(getNumber(), 1) == 0); - int ret = int(getNumber()); - assert(double(ret) == getNumber()); // no loss in conversion - return ret; - } - - Value& operator=(const Value& other) { - free(); - switch (other.type) { - case String: - setString(other.str); - break; - case Number: - setNumber(other.num); - break; - case Array: - setArray(*other.arr); - break; - case Null: - setNull(); - break; - case Bool: - setBool(other.boo); - break; - case Object: - abort(); // TODO - } - return *this; - } - - bool operator==(const Value& other) { - if (type != other.type) return false; - switch (other.type) { - case String: - return str == other.str; - case Number: - return num == other.num; - case Array: - return this == &other; // if you want a deep compare, use deepCompare - case Null: - break; - case Bool: - return boo == other.boo; - case Object: - return this == &other; // if you want a deep compare, use deepCompare - } - return true; - } - - bool deepCompare(Ref ref) { - Value& other = *ref; - if (*this == other) return true; // either same pointer, or identical value type (string, number, null or bool) - if (type != other.type) return false; - if (type == Array) { - if (arr->size() != other.arr->size()) return false; - for (unsigned i = 0; i < arr->size(); i++) { - if (!(*arr)[i]->deepCompare((*other.arr)[i])) return false; - } - return true; - } else if (type == Object) { - if (obj->size() != other.obj->size()) return false; - for (auto i : *obj) { - if (other.obj->count(i.first) == 0) return false; - if (i.second->deepCompare((*other.obj)[i.first])) return false; - } - return true; - } - return false; - } - - char* parse(char* curr) { - #define is_json_space(x) (x == 32 || x == 9 || x == 10 || x == 13) /* space, tab, linefeed/newline, or return */ - #define skip() { while (*curr && is_json_space(*curr)) curr++; } - skip(); - if (*curr == '"') { - // String - curr++; - char *close = strchr(curr, '"'); - assert(close); - *close = 0; // end this string, and reuse it straight from the input - setString(curr); - curr = close+1; - } else if (*curr == '[') { - // Array - curr++; - skip(); - setArray(); - while (*curr != ']') { - Ref temp = arena.alloc(); - arr->push_back(temp); - curr = temp->parse(curr); - skip(); - if (*curr == ']') break; - assert(*curr == ','); - curr++; - skip(); - } - curr++; - } else if (*curr == 'n') { - // Null - assert(strncmp(curr, "null", 4) == 0); - setNull(); - curr += 4; - } else if (*curr == 't') { - // Bool true - assert(strncmp(curr, "true", 4) == 0); - setBool(true); - curr += 4; - } else if (*curr == 'f') { - // Bool false - assert(strncmp(curr, "false", 5) == 0); - setBool(false); - curr += 5; - } else if (*curr == '{') { - // Object - curr++; - skip(); - setObject(); - while (*curr != '}') { - assert(*curr == '"'); - curr++; - char *close = strchr(curr, '"'); - assert(close); - *close = 0; // end this string, and reuse it straight from the input - IString key(curr); - curr = close+1; - skip(); - assert(*curr == ':'); - curr++; - skip(); - Ref value = arena.alloc(); - curr = value->parse(curr); - (*obj)[key] = value; - skip(); - if (*curr == '}') break; - assert(*curr == ','); - curr++; - skip(); - } - curr++; - } else { - // Number - char *after; - setNumber(strtod(curr, &after)); - curr = after; - } - return curr; - } - - void stringify(std::ostream &os, bool pretty=false) { - static int indent = 0; - #define indentify() { for (int i = 0; i < indent; i++) os << " "; } - switch (type) { - case String: - os << '"' << str.str << '"'; - break; - case Number: - os << std::setprecision(17) << num; // doubles can have 17 digits of precision - break; - case Array: - if (arr->size() == 0) { - os << "[]"; - break; - } - os << '['; - if (pretty) { - os << std::endl; - indent++; - } - for (unsigned i = 0; i < arr->size(); i++) { - if (i > 0) { - if (pretty) os << "," << std::endl; - else os << ", "; - } - indentify(); - (*arr)[i]->stringify(os, pretty); - } - if (pretty) { - os << std::endl; - indent--; - } - indentify(); - os << ']'; - break; - case Null: - os << "null"; - break; - case Bool: - os << (boo ? "true" : "false"); - break; - case Object: - os << '{'; - if (pretty) { - os << std::endl; - indent++; - } - bool first = true; - for (auto i : *obj) { - if (first) { - first = false; - } else { - os << ", "; - if (pretty) os << std::endl; - } - indentify(); - os << '"' << i.first.c_str() << "\": "; - i.second->stringify(os, pretty); - } - if (pretty) { - os << std::endl; - indent--; - } - indentify(); - os << '}'; - break; - } - } - - // String operations - - // Number operations - - // Array operations - - unsigned size() { - assert(isArray()); - return arr->size(); - } - - void setSize(unsigned size) { - assert(isArray()); - unsigned old = arr->size(); - if (old != size) arr->resize(size); - if (old < size) { - for (unsigned i = old; i < size; i++) { - (*arr)[i] = arena.alloc(); - } - } - } - - Ref& operator[](unsigned x) { - assert(isArray()); - return arr->at(x); - } - - Value& push_back(Ref r) { - assert(isArray()); - arr->push_back(r); - return *this; - } - Ref pop_back() { - assert(isArray()); - Ref ret = arr->back(); - arr->pop_back(); - return ret; - } - - Ref back() { - assert(isArray()); - if (arr->size() == 0) return nullptr; - return arr->back(); - } - - void splice(int x, int num) { - assert(isArray()); - arr->erase(arr->begin() + x, arr->begin() + x + num); - } - - void insert(int x, int num) { - arr->insert(arr->begin() + x, num, Ref()); - } - void insert(int x, Ref node) { - arr->insert(arr->begin() + x, 1, node); - } - - int indexOf(Ref other) { - assert(isArray()); - for (unsigned i = 0; i < arr->size(); i++) { - if (other == (*arr)[i]) return i; - } - return -1; - } - - Ref map(std::function func) { - assert(isArray()); - Ref ret = arena.alloc(); - ret->setArray(); - for (unsigned i = 0; i < arr->size(); i++) { - ret->push_back(func((*arr)[i])); - } - return ret; - } - - Ref filter(std::function func) { - assert(isArray()); - Ref ret = arena.alloc(); - ret->setArray(); - for (unsigned i = 0; i < arr->size(); i++) { - Ref curr = (*arr)[i]; - if (func(curr)) ret->push_back(curr); - } - return ret; - } - - /* - void forEach(std::function func) { - for (unsigned i = 0; i < arr->size(); i++) { - func((*arr)[i]); - } - } - */ - - // Null operations - - // Bool operations - - // Object operations - - Ref& operator[](IString x) { - assert(isObject()); - return (*obj)[x]; - } - - bool has(IString x) { - assert(isObject()); - return obj->count(x) > 0; - } -}; - -// AST traversals - -// Traverse, calling visit before the children -void traversePre(Ref node, std::function visit); - -// Traverse, calling visitPre before the children and visitPost after -void traversePrePost(Ref node, std::function visitPre, std::function visitPost); - -// Traverse, calling visitPre before the children and visitPost after. If pre returns false, do not traverse children -void traversePrePostConditional(Ref node, std::function visitPre, std::function visitPost); - -// Traverses all the top-level functions in the document -void traverseFunctions(Ref ast, std::function visit); - -// JS printer - -struct JSPrinter { - bool pretty, finalize; - - char *buffer; - int size, used; - - int indent; - bool possibleSpace; // add a space to separate identifiers - - Ref ast; - - JSPrinter(bool pretty_, bool finalize_, Ref ast_) : pretty(pretty_), finalize(finalize_), buffer(0), size(0), used(0), indent(0), possibleSpace(false), ast(ast_) {} - - void printAst() { - print(ast); - buffer[used] = 0; - } - - // Utils - - void ensure(int safety=100) { - if (size < used + safety) { - size = std::max(1024, size*2) + safety; - if (!buffer) { - buffer = (char*)malloc(size); - if (!buffer) { - printf("Out of memory allocating %d bytes for output buffer!", size); - abort(); - } - } else { - char *buf = (char*)realloc(buffer, size); - if (!buf) { - free(buffer); - printf("Out of memory allocating %d bytes for output buffer!", size); - abort(); - } - buffer = buf; - } - } - } - - void emit(char c) { - maybeSpace(c); - if (!pretty && c == '}' && buffer[used-1] == ';') used--; // optimize ;} into }, the ; is not separating anything - ensure(1); - buffer[used++] = c; - } - - void emit(const char *s) { - maybeSpace(*s); - int len = strlen(s); - ensure(len+1); - strcpy(buffer + used, s); - used += len; - } - - void newline() { - if (!pretty) return; - emit('\n'); - for (int i = 0; i < indent; i++) emit(' '); - } - - void space() { - if (pretty) emit(' '); - } - - void safeSpace() { - if (pretty) emit(' '); - else possibleSpace = true; - } - - void maybeSpace(char s) { - if (possibleSpace) { - possibleSpace = false; - if (isIdentPart(s)) emit(' '); - } - } - - void print(Ref node) { - ensure(); - IString type = node[0]->getIString(); - //fprintf(stderr, "printing %s\n", type.str); - switch (type.str[0]) { - case 'a': { - if (type == ASSIGN) printAssign(node); - else if (type == ARRAY) printArray(node); - else abort(); - break; - } - case 'b': { - if (type == BINARY) printBinary(node); - else if (type == BLOCK) printBlock(node); - else if (type == BREAK) printBreak(node); - else abort(); - break; - } - case 'c': { - if (type == CALL) printCall(node); - else if (type == CONDITIONAL) printConditional(node); - else if (type == CONTINUE) printContinue(node); - else abort(); - break; - } - case 'd': { - if (type == DEFUN) printDefun(node); - else if (type == DO) printDo(node); - else if (type == DOT) printDot(node); - else abort(); - break; - } - case 'i': { - if (type == IF) printIf(node); - else abort(); - break; - } - case 'l': { - if (type == LABEL) printLabel(node); - else abort(); - break; - } - case 'n': { - if (type == NAME) printName(node); - else if (type == NUM) printNum(node); - else if (type == NEW) printNew(node); - else abort(); - break; - } - case 'o': { - if (type == OBJECT) printObject(node); - break; - } - case 'r': { - if (type == RETURN) printReturn(node); - else abort(); - break; - } - case 's': { - if (type == STAT) printStat(node); - else if (type == SUB) printSub(node); - else if (type == SEQ) printSeq(node); - else if (type == SWITCH) printSwitch(node); - else if (type == STRING) printString(node); - else abort(); - break; - } - case 't': { - if (type == TOPLEVEL) printToplevel(node); - else abort(); - break; - } - case 'u': { - if (type == UNARY_PREFIX) printUnaryPrefix(node); - else abort(); - break; - } - case 'v': { - if (type == VAR) printVar(node); - else abort(); - break; - } - case 'w': { - if (type == WHILE) printWhile(node); - else abort(); - break; - } - default: { - printf("cannot yet print %s\n", type.str); - abort(); - } - } - } - - // print a node, and if nothing is emitted, emit something instead - void print(Ref node, const char *otherwise) { - int last = used; - print(node); - if (used == last) emit(otherwise); - } - - void printStats(Ref stats) { - bool first = true; - for (size_t i = 0; i < stats->size(); i++) { - Ref curr = stats[i]; - if (!isNothing(curr)) { - if (first) first = false; - else newline(); - print(stats[i]); - } - } - } - - void printToplevel(Ref node) { - if (node[1]->size() > 0) { - printStats(node[1]); - } - } - - void printBlock(Ref node) { - if (node->size() == 1 || node[1]->size() == 0) { - emit("{}"); - return; - } - emit('{'); - indent++; - newline(); - printStats(node[1]); - indent--; - newline(); - emit('}'); - } - - void printDefun(Ref node) { - emit("function "); - emit(node[1]->getCString()); - emit('('); - Ref args = node[2]; - for (size_t i = 0; i < args->size(); i++) { - if (i > 0) (pretty ? emit(", ") : emit(',')); - emit(args[i]->getCString()); - } - emit(')'); - space(); - if (node->size() == 3 || node[3]->size() == 0) { - emit("{}"); - return; - } - emit('{'); - indent++; - newline(); - printStats(node[3]); - indent--; - newline(); - emit('}'); - newline(); - } - - bool isNothing(Ref node) { - return (node[0] == TOPLEVEL && node[1]->size() == 0) || (node[0] == STAT && isNothing(node[1])); - } - - void printStat(Ref node) { - if (!isNothing(node[1])) { - print(node[1]); - if (buffer[used-1] != ';') emit(';'); - } - } - - void printAssign(Ref node) { - printChild(node[2], node, -1); - space(); - emit('='); - space(); - printChild(node[3], node, 1); - } - - void printName(Ref node) { - emit(node[1]->getCString()); - } - - static char* numToString(double d, bool finalize=true) { - bool neg = d < 0; - if (neg) d = -d; - // try to emit the fewest necessary characters - bool integer = fmod(d, 1) == 0; - #define BUFFERSIZE 1000 - static char full_storage_f[BUFFERSIZE], full_storage_e[BUFFERSIZE]; // f is normal, e is scientific for float, x for integer - static char *storage_f = full_storage_f + 1, *storage_e = full_storage_e + 1; // full has one more char, for a possible '-' - double err_f, err_e; - for (int e = 0; e <= 1; e++) { - char *buffer = e ? storage_e : storage_f; - double temp; - if (!integer) { - static char format[6]; - for (int i = 0; i <= 18; i++) { - format[0] = '%'; - format[1] = '.'; - if (i < 10) { - format[2] = '0' + i; - format[3] = e ? 'e' : 'f'; - format[4] = 0; - } else { - format[2] = '1'; - format[3] = '0' + (i - 10); - format[4] = e ? 'e' : 'f'; - format[5] = 0; - } - snprintf(buffer, BUFFERSIZE-1, format, d); - sscanf(buffer, "%lf", &temp); - //errv("%.18f, %.18e => %s => %.18f, %.18e (%d), ", d, d, buffer, temp, temp, temp == d); - if (temp == d) break; - } - } else { - // integer - assert(d >= 0); - unsigned long long uu = (unsigned long long)d; - if (uu == d) { - bool asHex = e && !finalize; - snprintf(buffer, BUFFERSIZE-1, asHex ? "0x%llx" : "%llu", uu); - if (asHex) { - unsigned long long tempULL; - sscanf(buffer, "%llx", &tempULL); - temp = (double)tempULL; - } else { - sscanf(buffer, "%lf", &temp); - } - } else { - // too large for a machine integer, just use floats - snprintf(buffer, BUFFERSIZE-1, e ? "%e" : "%.0f", d); // even on integers, e with a dot is useful, e.g. 1.2e+200 - sscanf(buffer, "%lf", &temp); - } - //errv("%.18f, %.18e => %s => %.18f, %.18e, %llu (%d)\n", d, d, buffer, temp, temp, uu, temp == d); - } - (e ? err_e : err_f) = fabs(temp - d); - //errv("current attempt: %.18f => %s", d, buffer); - //assert(temp == d); - char *dot = strchr(buffer, '.'); - if (dot) { - // remove trailing zeros - char *end = dot+1; - while (*end >= '0' && *end <= '9') end++; - end--; - while (*end == '0') { - char *copy = end; - do { - copy[0] = copy[1]; - } while (*copy++ != 0); - end--; - } - //errv("%.18f => %s", d, buffer); - // remove preceding zeros - while (*buffer == '0') { - char *copy = buffer; - do { - copy[0] = copy[1]; - } while (*copy++ != 0); - } - //errv("%.18f ===> %s", d, buffer); - } else if (!integer || !e) { - // no dot. try to change 12345000 => 12345e3 - char *end = strchr(buffer, 0); - end--; - char *test = end; - // remove zeros, and also doubles can use at most 24 digits, we can truncate any extras even if not zero - while ((*test == '0' || test - buffer > 24) && test > buffer) test--; - int num = end - test; - if (num >= 3) { - test++; - test[0] = 'e'; - if (num < 10) { - test[1] = '0' + num; - test[2] = 0; - } else if (num < 100) { - test[1] = '0' + (num / 10); - test[2] = '0' + (num % 10); - test[3] = 0; - } else { - assert(num < 1000); - test[1] = '0' + (num / 100); - test[2] = '0' + (num % 100) / 10; - test[3] = '0' + (num % 10); - test[4] = 0; - } - } - } - //errv("..current attempt: %.18f => %s", d, buffer); - } - //fprintf(stderr, "options:\n%s\n%s\n (first? %d)\n", storage_e, storage_f, strlen(storage_e) < strlen(storage_f)); - char *ret; - if (err_e == err_f) { - ret = strlen(storage_e) < strlen(storage_f) ? storage_e : storage_f; - } else { - ret = err_e < err_f ? storage_e : storage_f; - } - if (neg) { - ret--; // safe to go back one, there is one more char in full_* - *ret = '-'; - } - return ret; - } - - void printNum(Ref node) { - emit(numToString(node[1]->getNumber(), finalize)); - } - - void printString(Ref node) { - emit('"'); - emit(node[1]->getCString()); - emit('"'); - } - - // Parens optimizing - - bool capturesOperators(Ref node) { - Ref type = node[0]; - return type == CALL || type == ARRAY || type == OBJECT || type == SEQ; - } - - int getPrecedence(Ref node, bool parent) { - Ref type = node[0]; - if (type == BINARY || type == UNARY_PREFIX) { - return OperatorClass::getPrecedence(type == BINARY ? OperatorClass::Binary : OperatorClass::Prefix, node[1]->getIString()); - } else if (type == SEQ) { - return OperatorClass::getPrecedence(OperatorClass::Binary, COMMA); - } else if (type == CALL) { - return parent ? OperatorClass::getPrecedence(OperatorClass::Binary, COMMA) : -1; // call arguments are split by commas, but call itself is safe - } else if (type == ASSIGN) { - return OperatorClass::getPrecedence(OperatorClass::Binary, SET); - } else if (type == CONDITIONAL) { - return OperatorClass::getPrecedence(OperatorClass::Tertiary, QUESTION); - } - // otherwise, this is something that fixes precedence explicitly, and we can ignore - return -1; // XXX - } - - // check whether we need parens for the child, when rendered in the parent - // @param childPosition -1 means it is printed to the left of parent, 0 means "anywhere", 1 means right - bool needParens(Ref parent, Ref child, int childPosition) { - int parentPrecedence = getPrecedence(parent, true); - int childPrecedence = getPrecedence(child, false); - - if (childPrecedence > parentPrecedence) return true; // child is definitely a danger - if (childPrecedence < parentPrecedence) return false; // definitely cool - // equal precedence, so associativity (rtl/ltr) is what matters - // (except for some exceptions, where multiple operators can combine into confusion) - if (parent[0] == UNARY_PREFIX) { - assert(child[0] == UNARY_PREFIX); - if ((parent[1] == PLUS || parent[1] == MINUS) && child[1] == parent[1]) { - // cannot emit ++x when we mean +(+x) - return true; - } - } - if (childPosition == 0) return true; // child could be anywhere, so always paren - if (childPrecedence < 0) return false; // both precedences are safe - // check if child is on the dangerous side - if (OperatorClass::getRtl(parentPrecedence)) return childPosition < 0; - else return childPosition > 0; - } - - void printChild(Ref child, Ref parent, int childPosition=0) { - bool parens = needParens(parent, child, childPosition); - if (parens) emit('('); - print(child); - if (parens) emit(')'); - } - - void printBinary(Ref node) { - printChild(node[2], node, -1); - space(); - emit(node[1]->getCString()); - space(); - printChild(node[3], node, 1); - } - - void printUnaryPrefix(Ref node) { - if (finalize && node[1] == PLUS && (node[2][0] == NUM || - (node[2][0] == UNARY_PREFIX && node[2][1] == MINUS && node[2][2][0] == NUM))) { - // emit a finalized number - int last = used; - print(node[2]); - ensure(1); // we temporarily append a 0 - char *curr = buffer + last; // ensure might invalidate - buffer[used] = 0; - if (strchr(curr, '.')) return; // already a decimal point, all good - char *e = strchr(curr, 'e'); - if (!e) { - emit(".0"); - return; - } - ensure(3); - curr = buffer + last; // ensure might invalidate - char *end = strchr(curr, 0); - while (end >= e) { - end[2] = end[0]; - end--; - } - e[0] = '.'; - e[1] = '0'; - used += 2; - return; - } - if ((buffer[used-1] == '-' && node[1] == MINUS) || - (buffer[used-1] == '+' && node[1] == PLUS)) { - emit(' '); // cannot join - and - to --, looks like the -- operator - } - emit(node[1]->getCString()); - printChild(node[2], node, 1); - } - - void printConditional(Ref node) { - printChild(node[1], node, -1); - space(); - emit('?'); - space(); - printChild(node[2], node, 0); - space(); - emit(':'); - space(); - printChild(node[3], node, 1); - } - - void printCall(Ref node) { - printChild(node[1], node, 0); - emit('('); - Ref args = node[2]; - for (size_t i = 0; i < args->size(); i++) { - if (i > 0) (pretty ? emit(", ") : emit(',')); - printChild(args[i], node, 0); - } - emit(')'); - } - - void printSeq(Ref node) { - printChild(node[1], node, -1); - emit(','); - space(); - printChild(node[2], node, 1); - } - - void printDot(Ref node) { - print(node[1]); - emit('.'); - emit(node[2]->getCString()); - } - - void printSwitch(Ref node) { - emit("switch"); - space(); - emit('('); - print(node[1]); - emit(')'); - space(); - emit('{'); - newline(); - Ref cases = node[2]; - for (size_t i = 0; i < cases->size(); i++) { - Ref c = cases[i]; - if (!c[0]) { - emit("default:"); - } else { - emit("case "); - print(c[0]); - emit(':'); - } - if (c[1]->size() > 0) { - indent++; - newline(); - int curr = used; - printStats(c[1]); - indent--; - if (curr != used) newline(); - else used--; // avoid the extra indentation we added tentatively - } else { - newline(); - } - } - emit('}'); - } - - void printSub(Ref node) { - printChild(node[1], node, -1); - emit('['); - print(node[2]); - emit(']'); - } - - void printVar(Ref node) { - emit("var "); - Ref args = node[1]; - for (size_t i = 0; i < args->size(); i++) { - if (i > 0) (pretty ? emit(", ") : emit(',')); - emit(args[i][0]->getCString()); - if (args[i]->size() > 1) { - space(); - emit('='); - space(); - print(args[i][1]); - } - } - emit(';'); - } - - static bool ifHasElse(Ref node) { - assert(node[0] == IF); - return node->size() >= 4 && !!node[3]; - } - - void printIf(Ref node) { - emit("if"); - safeSpace(); - emit('('); - print(node[1]); - emit(')'); - space(); - // special case: we need braces to save us from ambiguity, if () { if () } else. otherwise else binds to inner if - // also need to recurse for if () { if () { } else { if () } else - // (note that this is only a problem if the if body has a single element in it, not a block or such, as then - // the block would be braced) - // this analysis is a little conservative - it assumes any child if could be confused with us, which implies - // all other braces vanished (the worst case for us, we are not saved by other braces). - bool needBraces = false; - bool hasElse = ifHasElse(node); - if (hasElse) { - Ref child = node[2]; - while (child[0] == IF) { - if (!ifHasElse(child)) { - needBraces = true; - break; - } - child = child[3]; // continue into the else - } - } - if (needBraces) { - emit('{'); - indent++; - newline(); - print(node[2]); - indent--; - newline(); - emit('}'); - } else { - print(node[2], "{}"); - } - if (hasElse) { - space(); - emit("else"); - safeSpace(); - print(node[3], "{}"); - } - } - - void printDo(Ref node) { - emit("do"); - safeSpace(); - print(node[2], "{}"); - space(); - emit("while"); - space(); - emit('('); - print(node[1]); - emit(')'); - emit(';'); - } - - void printWhile(Ref node) { - emit("while"); - space(); - emit('('); - print(node[1]); - emit(')'); - space(); - print(node[2], "{}"); - } - - void printLabel(Ref node) { - emit(node[1]->getCString()); - space(); - emit(':'); - space(); - print(node[2]); - } - - void printReturn(Ref node) { - emit("return"); - if (!!node[1]) { - emit(' '); - print(node[1]); - } - emit(';'); - } - - void printBreak(Ref node) { - emit("break"); - if (!!node[1]) { - emit(' '); - emit(node[1]->getCString()); - } - emit(';'); - } - - void printContinue(Ref node) { - emit("continue"); - if (!!node[1]) { - emit(' '); - emit(node[1]->getCString()); - } - emit(';'); - } - - void printNew(Ref node) { - emit("new "); - print(node[1]); - } - - void printArray(Ref node) { - emit('['); - Ref args = node[1]; - for (size_t i = 0; i < args->size(); i++) { - if (i > 0) (pretty ? emit(", ") : emit(',')); - print(args[i]); - } - emit(']'); - } - - void printObject(Ref node) { - emit('{'); - indent++; - newline(); - Ref args = node[1]; - for (size_t i = 0; i < args->size(); i++) { - if (i > 0) { - pretty ? emit(", ") : emit(','); - newline(); - } - emit('"'); - emit(args[i][0]->getCString()); - emit("\":"); - space(); - print(args[i][1]); - } - indent--; - newline(); - emit('}'); - } -}; - -// cashew builder - -class ValueBuilder { - static IStringSet statable; - - static Ref makeRawString(const IString& s) { - return &arena.alloc()->setString(s); - } - - static Ref makeRawArray(int size_hint=0) { - return &arena.alloc()->setArray(size_hint); - } - - static Ref makeNull() { - return &arena.alloc()->setNull(); - } - -public: - static Ref makeToplevel() { - return &makeRawArray(2)->push_back(makeRawString(TOPLEVEL)) - .push_back(makeRawArray()); - } - - static Ref makeString(IString str) { - return &makeRawArray(2)->push_back(makeRawString(STRING)) - .push_back(makeRawString(str)); - } - - static Ref makeBlock() { - return &makeRawArray(2)->push_back(makeRawString(BLOCK)) - .push_back(makeRawArray()); - } - - static Ref makeName(IString name) { - return &makeRawArray(2)->push_back(makeRawString(NAME)) - .push_back(makeRawString(name)); - } - - static void setBlockContent(Ref target, Ref block) { - if (target[0] == TOPLEVEL) { - target[1]->setArray(block[1]->getArray()); - } else if (target[0] == DEFUN) { - target[3]->setArray(block[1]->getArray()); - } else abort(); - } - - static void appendToBlock(Ref block, Ref element) { - assert(block[0] == BLOCK); - block[1]->push_back(element); - } - - static Ref makeCall(Ref target) { - return &makeRawArray(3)->push_back(makeRawString(CALL)) - .push_back(target) - .push_back(makeRawArray()); - } - - static void appendToCall(Ref call, Ref element) { - assert(call[0] == CALL); - call[2]->push_back(element); - } - - static Ref makeStatement(Ref contents) { - if (statable.has(contents[0]->getIString())) { - return &makeRawArray(2)->push_back(makeRawString(STAT)) - .push_back(contents); - } else { - return contents; // only very specific things actually need to be stat'ed - } - } - - static Ref makeDouble(double num) { - return &makeRawArray(2)->push_back(makeRawString(NUM)) - .push_back(&arena.alloc()->setNumber(num)); - } - static Ref makeInt(uint32_t num) { - return makeDouble(double(num)); - } - - static Ref makeBinary(Ref left, IString op, Ref right) { - if (op == SET) { - return &makeRawArray(4)->push_back(makeRawString(ASSIGN)) - .push_back(&arena.alloc()->setBool(true)) - .push_back(left) - .push_back(right); - } else if (op == COMMA) { - return &makeRawArray(3)->push_back(makeRawString(SEQ)) - .push_back(left) - .push_back(right); - } else { - return &makeRawArray(4)->push_back(makeRawString(BINARY)) - .push_back(makeRawString(op)) - .push_back(left) - .push_back(right); - } - } - - static Ref makePrefix(IString op, Ref right) { - return &makeRawArray(3)->push_back(makeRawString(UNARY_PREFIX)) - .push_back(makeRawString(op)) - .push_back(right); - } - - static Ref makeFunction(IString name) { - return &makeRawArray(4)->push_back(makeRawString(DEFUN)) - .push_back(makeRawString(name)) - .push_back(makeRawArray()) - .push_back(makeRawArray()); - } - - static void appendArgumentToFunction(Ref func, IString arg) { - assert(func[0] == DEFUN); - func[2]->push_back(makeRawString(arg)); - } - - static Ref makeVar(bool is_const) { - return &makeRawArray(2)->push_back(makeRawString(VAR)) - .push_back(makeRawArray()); - } - - static void appendToVar(Ref var, IString name, Ref value) { - assert(var[0] == VAR); - Ref array = &makeRawArray(1)->push_back(makeRawString(name)); - if (!!value) array->push_back(value); - var[1]->push_back(array); - } - - static Ref makeReturn(Ref value) { - return &makeRawArray(2)->push_back(makeRawString(RETURN)) - .push_back(!!value ? value : makeNull()); - } - - static Ref makeIndexing(Ref target, Ref index) { - return &makeRawArray(3)->push_back(makeRawString(SUB)) - .push_back(target) - .push_back(index); - } - - static Ref makeIf(Ref condition, Ref ifTrue, Ref ifFalse) { - return &makeRawArray(4)->push_back(makeRawString(IF)) - .push_back(condition) - .push_back(ifTrue) - .push_back(!!ifFalse ? ifFalse : makeNull()); - } - - static Ref makeConditional(Ref condition, Ref ifTrue, Ref ifFalse) { - return &makeRawArray(4)->push_back(makeRawString(CONDITIONAL)) - .push_back(condition) - .push_back(ifTrue) - .push_back(ifFalse); - } - - static Ref makeDo(Ref body, Ref condition) { - return &makeRawArray(3)->push_back(makeRawString(DO)) - .push_back(condition) - .push_back(body); - } - - static Ref makeWhile(Ref condition, Ref body) { - return &makeRawArray(3)->push_back(makeRawString(WHILE)) - .push_back(condition) - .push_back(body); - } - - static Ref makeBreak(IString label) { - return &makeRawArray(2)->push_back(makeRawString(BREAK)) - .push_back(!!label ? makeRawString(label) : makeNull()); - } - - static Ref makeContinue(IString label) { - return &makeRawArray(2)->push_back(makeRawString(CONTINUE)) - .push_back(!!label ? makeRawString(label) : makeNull()); - } - - static Ref makeLabel(IString name, Ref body) { - return &makeRawArray(3)->push_back(makeRawString(LABEL)) - .push_back(makeRawString(name)) - .push_back(body); - } - - static Ref makeSwitch(Ref input) { - return &makeRawArray(3)->push_back(makeRawString(SWITCH)) - .push_back(input) - .push_back(makeRawArray()); - } - - static void appendCaseToSwitch(Ref switch_, Ref arg) { - assert(switch_[0] == SWITCH); - switch_[2]->push_back(&makeRawArray(2)->push_back(arg) - .push_back(makeRawArray())); - } - - static void appendDefaultToSwitch(Ref switch_) { - assert(switch_[0] == SWITCH); - switch_[2]->push_back(&makeRawArray(2)->push_back(makeNull()) - .push_back(makeRawArray())); - } - - static void appendCodeToSwitch(Ref switch_, Ref code, bool explicitBlock) { - assert(switch_[0] == SWITCH); - assert(code[0] == BLOCK); - if (!explicitBlock) { - for (size_t i = 0; i < code[1]->size(); i++) { - switch_[2]->back()->back()->push_back(code[1][i]); - } - } else { - switch_[2]->back()->back()->push_back(code); - } - } - - static Ref makeDot(Ref obj, IString key) { - return &makeRawArray(3)->push_back(makeRawString(DOT)) - .push_back(obj) - .push_back(makeRawString(key)); - } - - static Ref makeDot(Ref obj, Ref key) { - assert(key[0] == NAME); - return makeDot(obj, key[1]->getIString()); - } - - static Ref makeNew(Ref call) { - return &makeRawArray(2)->push_back(makeRawString(NEW)) - .push_back(call); - } - - static Ref makeArray() { - return &makeRawArray(2)->push_back(makeRawString(ARRAY)) - .push_back(makeRawArray()); - } - - static void appendToArray(Ref array, Ref element) { - assert(array[0] == ARRAY); - array[1]->push_back(element); - } - - static Ref makeObject() { - return &makeRawArray(2)->push_back(makeRawString(OBJECT)) - .push_back(makeRawArray()); - } - - static void appendToObject(Ref array, IString key, Ref value) { - assert(array[0] == OBJECT); - array[1]->push_back(&makeRawArray(2)->push_back(makeRawString(key)) - .push_back(value)); - } -}; - -// Tolerates 0.0 in the input; does not trust a +() to be there. -class DotZeroValueBuilder : public ValueBuilder { -public: - static Ref makeDouble(double num) { - return makePrefix(PLUS, ValueBuilder::makeDouble(num)); - } -}; - -} // namespace cashew - -#endif // __simple_ast_h__ - diff --git a/src/snprintf.h b/src/snprintf.h deleted file mode 100644 index 2792a23a2..000000000 --- a/src/snprintf.h +++ /dev/null @@ -1,32 +0,0 @@ -#include - -// Visual Studio does not support C99, so emulate snprintf support for it manually. - -#ifdef _MSC_VER - -#define snprintf c99_snprintf - -inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap) -{ - int count = -1; - - if (size != 0) - count = _vsnprintf_s(str, size, _TRUNCATE, format, ap); - if (count == -1) - count = _vscprintf(format, ap); - - return count; -} - -inline int c99_snprintf(char* str, size_t size, const char* format, ...) -{ - int count; - va_list ap; - - va_start(ap, format); - count = c99_vsnprintf(str, size, format, ap); - va_end(ap); - - return count; -} -#endif -- cgit v1.2.3