summaryrefslogtreecommitdiff
path: root/candle-wasm-examples/whisper/lib-example.html
diff options
context:
space:
mode:
Diffstat (limited to 'candle-wasm-examples/whisper/lib-example.html')
-rw-r--r--candle-wasm-examples/whisper/lib-example.html313
1 files changed, 313 insertions, 0 deletions
diff --git a/candle-wasm-examples/whisper/lib-example.html b/candle-wasm-examples/whisper/lib-example.html
new file mode 100644
index 00000000..a8c49785
--- /dev/null
+++ b/candle-wasm-examples/whisper/lib-example.html
@@ -0,0 +1,313 @@
+<html>
+ <head>
+ <meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
+ <title>Candle Whisper Rust/WASM</title>
+ </head>
+ <body></body>
+</html>
+
+<!doctype html>
+<html>
+ <head>
+ <meta charset="UTF-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+ <style>
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
+ html,
+ body {
+ font-family: "Source Sans 3", sans-serif;
+ }
+ </style>
+ <script src="https://cdn.tailwindcss.com"></script>
+ <script type="module">
+ // base url for audio examples
+ const AUDIO_BASE_URL =
+ "https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/";
+
+ // models base url
+ const MODELS = {
+ tiny_en: {
+ base_url:
+ "https://huggingface.co/openai/whisper-tiny.en/resolve/refs%2Fpr%2F17/",
+ },
+ };
+ const whisperWorker = new Worker("./whisperWorker.js", {
+ type: "module",
+ });
+
+ async function classifyAudio(
+ weightsURL, // URL to the weights file
+ modelID, // model ID
+ tokenizerURL, // URL to the tokenizer file
+ mel_filtersURL, // URL to the mel filters file
+ audioURL, // URL to the audio file
+ updateStatus // function to update the status
+ ) {
+ return new Promise((resolve, reject) => {
+ whisperWorker.postMessage({
+ weightsURL,
+ modelID,
+ tokenizerURL,
+ mel_filtersURL,
+ audioURL,
+ });
+ whisperWorker.addEventListener("message", (event) => {
+ console.log(event.data);
+ if ("status" in event.data) {
+ updateStatus(event.data);
+ }
+ if ("error" in event.data) {
+ reject(new Error(event.data.error));
+ }
+ if (event.data.status === "complete") {
+ resolve(event.data);
+ }
+ });
+ });
+ }
+
+ // keep track of the audio URL
+ let audioURL = null;
+ function setAudio(src) {
+ const audio = document.querySelector("#audio");
+ audio.src = src;
+ audio.controls = true;
+ audio.hidden = false;
+ document.querySelector("#detect").disabled = false;
+ audioURL = src;
+ }
+ // add event listener to audio buttons
+ document.querySelectorAll("#audios-select > button").forEach((target) => {
+ target.addEventListener("click", (e) => {
+ const value = target.dataset.value;
+ const href = AUDIO_BASE_URL + value;
+ setAudio(href);
+ });
+ });
+ //add event listener to file input
+ document.querySelector("#file-upload").addEventListener("change", (e) => {
+ const target = e.target;
+ if (target.files.length > 0) {
+ const href = URL.createObjectURL(target.files[0]);
+ setAudio(href);
+ }
+ });
+ // add event listener to drop-area
+ const dropArea = document.querySelector("#drop-area");
+ dropArea.addEventListener("dragenter", (e) => {
+ e.preventDefault();
+ dropArea.classList.add("border-blue-700");
+ });
+ dropArea.addEventListener("dragleave", (e) => {
+ e.preventDefault();
+ dropArea.classList.remove("border-blue-700");
+ });
+ dropArea.addEventListener("dragover", (e) => {
+ e.preventDefault();
+ dropArea.classList.add("border-blue-700");
+ });
+ dropArea.addEventListener("drop", (e) => {
+ e.preventDefault();
+ dropArea.classList.remove("border-blue-700");
+ const url = e.dataTransfer.getData("text/uri-list");
+ const files = e.dataTransfer.files;
+ if (files.length > 0) {
+ const href = URL.createObjectURL(files[0]);
+ setAudio(href);
+ } else if (url) {
+ setAudio(url);
+ }
+ });
+
+ // add event listener to detect button
+ document.querySelector("#detect").addEventListener("click", async () => {
+ if (audioURL === null) {
+ return;
+ }
+ const modelID = document.querySelector("#model").value;
+ const modelURL = MODELS[modelID].base_url + "model.safetensors";
+ const tokenizerURL = MODELS[modelID].base_url + "tokenizer.json";
+
+ classifyAudio(
+ modelURL,
+ modelID,
+ tokenizerURL,
+ "mel_filters.safetensors",
+ audioURL,
+ updateStatus
+ )
+ .then((result) => {
+ console.log("RESULT", result);
+ const { output } = result;
+ const text = output.map((segment) => segment.dr.text).join(" ");
+ console.log(text);
+ document.getElementById("output").textContent = text;
+ })
+ .catch((error) => {
+ console.error(error);
+ });
+ });
+
+ function updateStatus(data) {
+ const { status, message } = data;
+ const button = document.querySelector("#detect");
+ if (status === "decoding" || status === "loading") {
+ button.disabled = true;
+ button.textContent = message;
+ } else if (status === "complete") {
+ button.disabled = false;
+ button.textContent = "Transcribe Audio";
+ }
+ }
+ </script>
+ </head>
+ <body class="container max-w-4xl mx-auto p-4">
+ <main class="grid grid-cols-1 gap-8 relative">
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
+ <div>
+ <h1 class="text-5xl font-bold">Candle Whisper</h1>
+ <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
+ <p class="max-w-lg">
+ Transcribe audio in the browser using rust/wasm with an audio file.
+ This demo uses the
+ <a
+ href="https://huggingface.co/openai/"
+ target="_blank"
+ class="underline hover:text-blue-500 hover:no-underline"
+ >
+ OpenAI Whisper models
+ </a>
+ and WASM runtime built with
+ <a
+ href="https://github.com/huggingface/candle/"
+ target="_blank"
+ class="underline hover:text-blue-500 hover:no-underline"
+ >Candle
+ </a>
+ </p>
+ </div>
+
+ <div>
+ <label for="model" class="font-medium">Models Options: </label>
+ <select
+ id="model"
+ class="border-2 border-gray-500 rounded-md font-light"
+ >
+ <option value="tiny_en" selected>tiny.en (151 MB)</option>
+ </select>
+ </div>
+ <!-- drag and drop area -->
+ <div class="relative">
+ <div
+ id="drop-area"
+ class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden"
+ >
+ <div
+ class="flex flex-col items-center justify-center space-y-1 text-center"
+ >
+ <svg
+ width="25"
+ height="25"
+ viewBox="0 0 25 25"
+ fill="none"
+ xmlns="http://www.w3.org/2000/svg"
+ >
+ <path
+ d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
+ fill="#000"
+ />
+ </svg>
+ <div class="flex text-sm text-gray-600">
+ <label
+ for="file-upload"
+ class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700"
+ >
+ <span>Drag and drop your audio here</span>
+ <span class="block text-xs">or</span>
+ <span class="block text-xs">Click to upload</span>
+ </label>
+ </div>
+ <input
+ id="file-upload"
+ name="file-upload"
+ type="file"
+ accept="audio/*"
+ class="sr-only"
+ />
+ </div>
+ <audio
+ id="audio"
+ hidden
+ controls
+ class="w-full p-2 select-none"
+ ></audio>
+ </div>
+ </div>
+ <div>
+ <div class="flex flex-wrap gap-3 items-center" id="audios-select">
+ <h3 class="font-medium">Examples:</h3>
+ <button
+ data-value="samples_jfk.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>jfk.wav</span>
+ <span class="text-xs block"> (352 kB)</span>
+ </button>
+ <button
+ data-value="samples_a13.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>a13.wav</span>
+ <span class="text-xs block"> (960 kB)</span>
+ </button>
+ <button
+ data-value="samples_mm0.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>mm0.wav</span>
+ <span class="text-xs block new"> (957 kB)</span>
+ </button>
+ <button
+ data-value="samples_gb0.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>gb0.wav </span>
+ <span class="text-xs block">(4.08 MB)</span>
+ </button>
+ <button
+ data-value="samples_gb1.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>gb1.wav </span>
+ <span class="text-xs block">(6.36 MB)</span>
+ </button>
+ <button
+ data-value="samples_hp0.wav"
+ class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
+ >
+ <span>hp0.wav </span>
+ <span class="text-xs block">(8.75 MB)</span>
+ </button>
+ </div>
+ </div>
+
+ <div>
+ <button
+ id="detect"
+ disabled
+ class="bg-orange-900 hover:bg-orange-800 text-white font-normal py-2 px-4 rounded disabled:opacity-75 disabled:cursor-not-allowed"
+ >
+ Transcribe Audio
+ </button>
+ </div>
+ <div>
+ <h3 class="font-medium">Transcription:</h3>
+
+ <div
+ id="output"
+ class="min-h-[100px] bg-slate-500 text-white p-4 rounded-md"
+ ></div>
+ </div>
+ </main>
+ </body>
+</html>