From 104e196d468d6c440c9f1fc504be37b2cbfb9722 Mon Sep 17 00:00:00 2001 From: Radamés Ajna Date: Thu, 14 Dec 2023 04:04:17 -0800 Subject: Phi 2 wasm (#1432) * add phi 2.0 quantized model wasm * cols * spell * bug --- candle-wasm-examples/phi/index.html | 90 ++++++++++++++++++++++++++--------- candle-wasm-examples/phi/phiWorker.js | 17 ++++++- candle-wasm-examples/phi/src/bin/m.rs | 21 +++++++- 3 files changed, 102 insertions(+), 26 deletions(-) (limited to 'candle-wasm-examples') diff --git a/candle-wasm-examples/phi/index.html b/candle-wasm-examples/phi/index.html index 19c6a586..dbef698a 100644 --- a/candle-wasm-examples/phi/index.html +++ b/candle-wasm-examples/phi/index.html @@ -1,7 +1,7 @@ - Candle Phi 1.5 Rust/WASM + Candle Phi 1.5 / Phi 2.0 Rust/WASM @@ -39,7 +39,7 @@ import hljs from "https://cdn.skypack.dev/highlight.js"; // models base url const MODELS = { - phi_1_5_quantized: { + phi_1_5_q4k: { base_url: "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/", model: "model-q4k.gguf", @@ -49,7 +49,7 @@ seq_len: 2048, size: "800 MB", }, - phi_1_5_quantized_2: { + phi_1_5_q80: { base_url: "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/", model: "model-q80.gguf", @@ -59,7 +59,21 @@ seq_len: 2048, size: "1.51 GB", }, - puffin_phi_v2_quantized: { + phi_2_0_q4k: { + base_url: + "https://huggingface.co/radames/phi-2-quantized/resolve/main/", + model: [ + "model-v2-q4k.gguf_aa.part", + "model-v2-q4k.gguf_ab.part", + "model-v2-q4k.gguf_ac.part", + ], + tokenizer: "tokenizer.json", + config: "config.json", + quantized: true, + seq_len: 2048, + size: "1.57GB", + }, + puffin_phi_v2_q4k: { base_url: "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/", model: "model-puffin-phi-v2-q4k.gguf", @@ -69,7 +83,7 @@ seq_len: 2048, size: "798 MB", }, - puffin_phi_v2_quantized_2: { + puffin_phi_v2_q80: { base_url: "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/", model: "model-puffin-phi-v2-q80.gguf", @@ -106,8 +120,8 @@ Let’s think step by step.`, }, { title: "Question answering", - prompt: `What is the capital of France? -Answer:`, + prompt: `Instruct: What is the capital of France? +Output:`, }, { title: "Chat mode", @@ -148,7 +162,10 @@ Very polite review:`, const getValue = (id) => document.querySelector(`#${id}`).value; const modelID = getValue("model"); const model = MODELS[modelID]; - const weightsURL = model.base_url + model.model; + const weightsURL = + model.model instanceof Array + ? model.model.map((m) => model.base_url + m) + : model.base_url + model.model; const tokenizerURL = model.base_url + model.tokenizer; const configURL = model.base_url + model.config; @@ -246,6 +263,13 @@ Very polite review:`, option.innerText = `${id} (${model.size})`; modelSelect.appendChild(option); } + const query = new URLSearchParams(window.location.search); + const modelID = query.get("model"); + if (modelID) { + modelSelect.value = modelID; + } else { + modelSelect.value = "phi_1_5_q4k"; + } for (const [i, { title, prompt }] of TEMPLATES.entries()) { const div = document.createElement("div"); @@ -270,8 +294,18 @@ Very polite review:`, prompt.value = template; prompt.style.height = "auto"; prompt.style.height = prompt.scrollHeight + "px"; + runBtn.disabled = false; + clearBtn.classList.remove("invisible"); }); modelSelect.addEventListener("change", (e) => { + const query = new URLSearchParams(window.location.search); + query.set("model", e.target.value); + window.history.replaceState( + {}, + "", + `${window.location.pathname}?${query}` + ); + window.parent.postMessage({ queryString: "?" + query }, "*"); const model = MODELS[e.target.value]; document.querySelector("#max-seq").max = model.seq_len; document.querySelector("#max-seq").nextElementSibling.value = 200; @@ -320,7 +354,7 @@ Very polite review:`,
🕯️
-

Candle Phi 1.5

+

Candle Phi 1.5 / Phi 2.0

Rust/WASM Demo

The @@ -330,10 +364,17 @@ Very polite review:`, target="_blank" >Phi-1.5 - model achieves state-of-the-art performance with only 1.3 billion - parameters, compared to models with up to 10 billion. You can try the - quantized version of the model here. Additional prompt examples are - available in the + and + Phi-2 + models achieve state-of-the-art performance with only 1.3 billion and + 2.7 billion parameters, compared to larger models with up to 13 + billion parameters. Here you can try the quantized versions. + Additional prompt examples are available in the Puffin-Phi V2 - quantized version model, a fine-tuned version of Phi-1.5 on the + quantized version, a fine-tuned version of Phi-1.5 on the Note: When first run, the app will download and cache the model, which could - take a few minutes. The models are ~800MB or ~1.51GB in + take a few minutes. The models are ~800MB or ~1.57GB in size.

@@ -375,8 +416,13 @@ Very polite review:`, >
-

Prompt Templates

-
+
+ Prompt Templates +
+
-Write a detailed analogy between mathematics and a lighthouse. -Answer: