diff options
Diffstat (limited to 'candle-wasm-examples/whisper/lib-example.html')
-rw-r--r-- | candle-wasm-examples/whisper/lib-example.html | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/candle-wasm-examples/whisper/lib-example.html b/candle-wasm-examples/whisper/lib-example.html new file mode 100644 index 00000000..a8c49785 --- /dev/null +++ b/candle-wasm-examples/whisper/lib-example.html @@ -0,0 +1,313 @@ +<html> + <head> + <meta content="text/html;charset=utf-8" http-equiv="Content-Type" /> + <title>Candle Whisper Rust/WASM</title> + </head> + <body></body> +</html> + +<!doctype html> +<html> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <style> + @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); + html, + body { + font-family: "Source Sans 3", sans-serif; + } + </style> + <script src="https://cdn.tailwindcss.com"></script> + <script type="module"> + // base url for audio examples + const AUDIO_BASE_URL = + "https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/"; + + // models base url + const MODELS = { + tiny_en: { + base_url: + "https://huggingface.co/openai/whisper-tiny.en/resolve/refs%2Fpr%2F17/", + }, + }; + const whisperWorker = new Worker("./whisperWorker.js", { + type: "module", + }); + + async function classifyAudio( + weightsURL, // URL to the weights file + modelID, // model ID + tokenizerURL, // URL to the tokenizer file + mel_filtersURL, // URL to the mel filters file + audioURL, // URL to the audio file + updateStatus // function to update the status + ) { + return new Promise((resolve, reject) => { + whisperWorker.postMessage({ + weightsURL, + modelID, + tokenizerURL, + mel_filtersURL, + audioURL, + }); + whisperWorker.addEventListener("message", (event) => { + console.log(event.data); + if ("status" in event.data) { + updateStatus(event.data); + } + if ("error" in event.data) { + reject(new Error(event.data.error)); + } + if (event.data.status === "complete") { + resolve(event.data); + } + }); + }); + } + + // keep track of the audio URL + let audioURL = null; + function setAudio(src) { + const audio = document.querySelector("#audio"); + audio.src = src; + audio.controls = true; + audio.hidden = false; + document.querySelector("#detect").disabled = false; + audioURL = src; + } + // add event listener to audio buttons + document.querySelectorAll("#audios-select > button").forEach((target) => { + target.addEventListener("click", (e) => { + const value = target.dataset.value; + const href = AUDIO_BASE_URL + value; + setAudio(href); + }); + }); + //add event listener to file input + document.querySelector("#file-upload").addEventListener("change", (e) => { + const target = e.target; + if (target.files.length > 0) { + const href = URL.createObjectURL(target.files[0]); + setAudio(href); + } + }); + // add event listener to drop-area + const dropArea = document.querySelector("#drop-area"); + dropArea.addEventListener("dragenter", (e) => { + e.preventDefault(); + dropArea.classList.add("border-blue-700"); + }); + dropArea.addEventListener("dragleave", (e) => { + e.preventDefault(); + dropArea.classList.remove("border-blue-700"); + }); + dropArea.addEventListener("dragover", (e) => { + e.preventDefault(); + dropArea.classList.add("border-blue-700"); + }); + dropArea.addEventListener("drop", (e) => { + e.preventDefault(); + dropArea.classList.remove("border-blue-700"); + const url = e.dataTransfer.getData("text/uri-list"); + const files = e.dataTransfer.files; + if (files.length > 0) { + const href = URL.createObjectURL(files[0]); + setAudio(href); + } else if (url) { + setAudio(url); + } + }); + + // add event listener to detect button + document.querySelector("#detect").addEventListener("click", async () => { + if (audioURL === null) { + return; + } + const modelID = document.querySelector("#model").value; + const modelURL = MODELS[modelID].base_url + "model.safetensors"; + const tokenizerURL = MODELS[modelID].base_url + "tokenizer.json"; + + classifyAudio( + modelURL, + modelID, + tokenizerURL, + "mel_filters.safetensors", + audioURL, + updateStatus + ) + .then((result) => { + console.log("RESULT", result); + const { output } = result; + const text = output.map((segment) => segment.dr.text).join(" "); + console.log(text); + document.getElementById("output").textContent = text; + }) + .catch((error) => { + console.error(error); + }); + }); + + function updateStatus(data) { + const { status, message } = data; + const button = document.querySelector("#detect"); + if (status === "decoding" || status === "loading") { + button.disabled = true; + button.textContent = message; + } else if (status === "complete") { + button.disabled = false; + button.textContent = "Transcribe Audio"; + } + } + </script> + </head> + <body class="container max-w-4xl mx-auto p-4"> + <main class="grid grid-cols-1 gap-8 relative"> + <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> + <div> + <h1 class="text-5xl font-bold">Candle Whisper</h1> + <h2 class="text-2xl font-bold">Rust/WASM Demo</h2> + <p class="max-w-lg"> + Transcribe audio in the browser using rust/wasm with an audio file. + This demo uses the + <a + href="https://huggingface.co/openai/" + target="_blank" + class="underline hover:text-blue-500 hover:no-underline" + > + OpenAI Whisper models + </a> + and WASM runtime built with + <a + href="https://github.com/huggingface/candle/" + target="_blank" + class="underline hover:text-blue-500 hover:no-underline" + >Candle + </a> + </p> + </div> + + <div> + <label for="model" class="font-medium">Models Options: </label> + <select + id="model" + class="border-2 border-gray-500 rounded-md font-light" + > + <option value="tiny_en" selected>tiny.en (151 MB)</option> + </select> + </div> + <!-- drag and drop area --> + <div class="relative"> + <div + id="drop-area" + class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden" + > + <div + class="flex flex-col items-center justify-center space-y-1 text-center" + > + <svg + width="25" + height="25" + viewBox="0 0 25 25" + fill="none" + xmlns="http://www.w3.org/2000/svg" + > + <path + d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z" + fill="#000" + /> + </svg> + <div class="flex text-sm text-gray-600"> + <label + for="file-upload" + class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700" + > + <span>Drag and drop your audio here</span> + <span class="block text-xs">or</span> + <span class="block text-xs">Click to upload</span> + </label> + </div> + <input + id="file-upload" + name="file-upload" + type="file" + accept="audio/*" + class="sr-only" + /> + </div> + <audio + id="audio" + hidden + controls + class="w-full p-2 select-none" + ></audio> + </div> + </div> + <div> + <div class="flex flex-wrap gap-3 items-center" id="audios-select"> + <h3 class="font-medium">Examples:</h3> + <button + data-value="samples_jfk.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>jfk.wav</span> + <span class="text-xs block"> (352 kB)</span> + </button> + <button + data-value="samples_a13.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>a13.wav</span> + <span class="text-xs block"> (960 kB)</span> + </button> + <button + data-value="samples_mm0.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>mm0.wav</span> + <span class="text-xs block new"> (957 kB)</span> + </button> + <button + data-value="samples_gb0.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>gb0.wav </span> + <span class="text-xs block">(4.08 MB)</span> + </button> + <button + data-value="samples_gb1.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>gb1.wav </span> + <span class="text-xs block">(6.36 MB)</span> + </button> + <button + data-value="samples_hp0.wav" + class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline" + > + <span>hp0.wav </span> + <span class="text-xs block">(8.75 MB)</span> + </button> + </div> + </div> + + <div> + <button + id="detect" + disabled + class="bg-orange-900 hover:bg-orange-800 text-white font-normal py-2 px-4 rounded disabled:opacity-75 disabled:cursor-not-allowed" + > + Transcribe Audio + </button> + </div> + <div> + <h3 class="font-medium">Transcription:</h3> + + <div + id="output" + class="min-h-[100px] bg-slate-500 text-white p-4 rounded-md" + ></div> + </div> + </main> + </body> +</html> |