Spaces:
Running
Running
| <html> | |||
| <head> | |||
| <meta charset="utf-8" /> | |||
| <meta name="viewport" content="width=device-width" /> | |||
| <title>Parquet Visualization Studio</title> | |||
| <link rel="stylesheet" href="style.css" /> | |||
| <script src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-mvp.wasm.js"></script> | |||
| <script src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-browser-mvp.worker.js"></script> | |||
| <script type="module" src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-browser-mvp.worker.js"></script> | |||
| <script src="https://cdn.jsdelivr.net/npm/vega@5"></script> | |||
| <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script> | |||
| <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script> | |||
| </head> | |||
| <body> | |||
| <div class="container"> | |||
| <h1>📊 Parquet Visualization Studio</h1> | |||
| <p class="subtitle">Visualize parquet files with interactive charts</p> | |||
| <form id="queryForm"> | |||
| <div class="form-group"> | |||
| <label for="urlSelect">Select Example Dataset</label> | |||
| <select id="urlSelect"> | |||
| <option value="">-- Choose a dataset or enter custom URL below --</option> | |||
| <option value="https://huggingface.co/datasets/PleIAs/SYNTH/resolve/refs%2Fconvert%2Fparquet/default/partial-train/0000.parquet">PleIAs/SYNTH</option> | |||
| <option value="https://huggingface.co/datasets/facebook/omnilingual-asr-corpus/resolve/refs%2Fconvert%2Fparquet/gby_Latn/train/0000.parquet">facebook/omnilingual-asr-corpus</option> | |||
| <option value="https://example.com/dataset3.parquet">Dataset 3</option> | |||
| <option value="https://example.com/dataset4.parquet">Dataset 4</option> | |||
| <option value="https://example.com/dataset5.parquet">Dataset 5</option> | |||
| <option value="https://example.com/dataset6.parquet">Dataset 6</option> | |||
| <option value="https://example.com/dataset7.parquet">Dataset 7</option> | |||
| <option value="https://example.com/dataset8.parquet">Dataset 8</option> | |||
| <option value="https://example.com/dataset9.parquet">Dataset 9</option> | |||
| <option value="https://example.com/dataset10.parquet">Dataset 10</option> | |||
| </select> | |||
| </div> | |||
| <div class="form-group"> | |||
| <label for="parquetUrl">Parquet File URL</label> | |||
| <input | |||
| type="text" | |||
| id="parquetUrl" | |||
| placeholder="https://example.com/data.parquet" | |||
| required | |||
| /> | |||
| </div> | |||
| <button type="submit" id="submitBtn">Load Dataset</button> | |||
| </form> | |||
| <div id="status" class="status"></div> | |||
| <div id="visualizationSection" class="visualization-section" style="display: none;"> | |||
| <h2>Create Visualization</h2> | |||
| <div class="form-group"> | |||
| <label for="hfToken">Hugging Face Token (required for LLM)</label> | |||
| <input | |||
| type="password" | |||
| id="hfToken" | |||
| placeholder="Enter your HF token with Inference Providers permission" | |||
| /> | |||
| <small>Get a token from <a href="https://huggingface.co/settings/tokens" target="_blank">HF Settings</a> with "Make calls to Inference Providers" permission</small> | |||
| </div> | |||
| <div class="form-group"> | |||
| <label for="vizPrompt">Describe the visualization you want</label> | |||
| <textarea | |||
| id="vizPrompt" | |||
| rows="3" | |||
| placeholder="e.g., Show a scatter plot of price vs quantity, Create a bar chart showing count by category..." | |||
| ></textarea> | |||
| </div> | |||
| <button type="button" id="generateVizBtn">Generate Visualization</button> | |||
| <div id="vizContainer" class="viz-container"></div> | |||
| </div> | |||
| </div> | |||
| <script type="module"> | |||
| import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/+esm'; | |||
| let db = null; | |||
| let conn = null; | |||
| let currentDatasetUrl = null; | |||
| let columnInfo = []; | |||
| // Initialize DuckDB | |||
| async function initDuckDB() { | |||
| const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); | |||
| const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); | |||
| const worker_url = URL.createObjectURL( | |||
| new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) | |||
| ); | |||
| const worker = new Worker(worker_url); | |||
| const logger = new duckdb.ConsoleLogger(); | |||
| db = new duckdb.AsyncDuckDB(logger, worker); | |||
| await db.instantiate(bundle.mainModule, bundle.pthreadWorker); | |||
| URL.revokeObjectURL(worker_url); | |||
| conn = await db.connect(); | |||
| } | |||
| // Update status message | |||
| function setStatus(message, type = 'info') { | |||
| const statusEl = document.getElementById('status'); | |||
| statusEl.textContent = message; | |||
| statusEl.className = `status status-${type}`; | |||
| statusEl.style.display = 'block'; | |||
| } | |||
| // Determine if a DuckDB type is a complex type (struct, list, map, etc.) | |||
| function isComplexType(type) { | |||
| const complexTypes = ['STRUCT', 'LIST', 'MAP', 'UNION', 'ARRAY']; | |||
| return complexTypes.some(t => type.toUpperCase().startsWith(t)); | |||
| } | |||
| // Determine if a DuckDB type is numeric | |||
| function isNumericType(type) { | |||
| // First check if it's a complex type | |||
| if (isComplexType(type)) return false; | |||
| const numericTypes = ['TINYINT', 'SMALLINT', 'INTEGER', 'BIGINT', 'HUGEINT', | |||
| 'FLOAT', 'DOUBLE', 'DECIMAL', 'NUMERIC', 'REAL']; | |||
| return numericTypes.some(t => type.toUpperCase().startsWith(t)); | |||
| } | |||
| // Determine if a DuckDB type is text | |||
| function isTextType(type) { | |||
| // First check if it's a complex type | |||
| if (isComplexType(type)) return false; | |||
| const textTypes = ['VARCHAR', 'CHAR', 'TEXT', 'STRING']; | |||
| return textTypes.some(t => type.toUpperCase().startsWith(t)); | |||
| } | |||
| // Load dataset: initialize DuckDB, drop old file, and register new parquet file | |||
| async function loadDataset(url) { | |||
| // Initialize DuckDB if not already done | |||
| if (!db) { | |||
| await initDuckDB(); | |||
| } | |||
| // Drop existing file registration if it exists | |||
| try { | |||
| await db.dropFile('data.parquet'); | |||
| } catch {} | |||
| // Register the parquet file from URL | |||
| await db.registerFileURL( | |||
| 'data.parquet', | |||
| url, | |||
| duckdb.DuckDBDataProtocol.HTTP, | |||
| false | |||
| ); | |||
| } | |||
| // Detect columns and their types from the dataset | |||
| async function detectColumns(url) { | |||
| try { | |||
| setStatus('Detecting column types...', 'info'); | |||
| // Load the dataset | |||
| await loadDataset(url); | |||
| // Query to get column information | |||
| const result = await conn.query("DESCRIBE 'data.parquet'"); | |||
| const rows = result.toArray(); | |||
| columnInfo = rows.map(row => ({ | |||
| name: row.column_name, | |||
| type: row.column_type | |||
| })); | |||
| setStatus(`Detected ${columnInfo.length} columns`, 'success'); | |||
| showVisualizationSection(); | |||
| } catch (error) { | |||
| console.error('Error detecting columns:', error); | |||
| setStatus(`Error detecting columns: ${error.message}`, 'error'); | |||
| columnInfo = []; | |||
| } | |||
| } | |||
| // Show visualization section after dataset is loaded | |||
| function showVisualizationSection() { | |||
| const vizSection = document.getElementById('visualizationSection'); | |||
| if (columnInfo.length > 0) { | |||
| vizSection.style.display = 'block'; | |||
| } else { | |||
| vizSection.style.display = 'none'; | |||
| setStatus('No columns found in dataset', 'error'); | |||
| } | |||
| } | |||
| // Handle form submission | |||
| async function handleSubmit(e) { | |||
| e.preventDefault(); | |||
| const parquetUrl = document.getElementById('parquetUrl').value.trim(); | |||
| const submitBtn = document.getElementById('submitBtn'); | |||
| if (!parquetUrl) { | |||
| setStatus('Please provide a parquet URL.', 'error'); | |||
| return; | |||
| } | |||
| try { | |||
| submitBtn.disabled = true; | |||
| submitBtn.textContent = 'Loading...'; | |||
| // Load dataset and detect columns | |||
| currentDatasetUrl = parquetUrl; | |||
| await detectColumns(parquetUrl); | |||
| } catch (error) { | |||
| console.error('Error:', error); | |||
| setStatus(`Error: ${error.message}`, 'error'); | |||
| } finally { | |||
| submitBtn.disabled = false; | |||
| submitBtn.textContent = 'Load Dataset'; | |||
| } | |||
| } | |||
| // Handle dropdown selection | |||
| document.getElementById('urlSelect').addEventListener('change', async function(e) { | |||
| const selectedUrl = e.target.value; | |||
| if (selectedUrl) { | |||
| document.getElementById('parquetUrl').value = selectedUrl; | |||
| currentDatasetUrl = selectedUrl; | |||
| await detectColumns(selectedUrl); | |||
| } | |||
| }); | |||
| // Handle manual URL input (detect when user blurs or presses enter) | |||
| document.getElementById('parquetUrl').addEventListener('blur', async function(e) { | |||
| const url = e.target.value.trim(); | |||
| if (url && url !== currentDatasetUrl) { | |||
| currentDatasetUrl = url; | |||
| await detectColumns(url); | |||
| } | |||
| }); | |||
| // Generate Vega-Lite spec using LLM | |||
| async function generateVisualization(prompt, hfToken) { | |||
| const vizContainer = document.getElementById('vizContainer'); | |||
| vizContainer.innerHTML = ''; | |||
| try { | |||
| setStatus('Generating visualization with LLM...', 'info'); | |||
| // Prepare column information for the LLM | |||
| const columnDescriptions = columnInfo.map(col => `- ${col.name}: ${col.type}`).join('\n'); | |||
| // Create system prompt | |||
| const systemPrompt = `You are a data visualization assistant that generates Vega-Lite specifications. | |||
| Available dataset columns: | |||
| ${columnDescriptions} | |||
| Instructions: | |||
| 1. Generate a valid Vega-Lite v5 specification based on the user's request | |||
| 2. Use ONLY columns that exist in the dataset above | |||
| 3. The data will be provided as an array of objects in the "data.values" field | |||
| 4. Output ONLY the JSON specification, no explanations or markdown | |||
| 5. Do not include the data itself, just reference fields by name | |||
| 6. Include appropriate width and height (e.g., 600x400) | |||
| 7. Make sure the spec is complete and valid | |||
| Output only the JSON spec starting with { and ending with }.`; | |||
| // Call HF Inference API | |||
| const response = await fetch( | |||
| "/static-proxy?url=https%3A%2F%2Frouter.huggingface.co%2Fv1%2Fchat%2Fcompletions%26quot%3B%3C%2Fspan%3E%2C%3C%2Fspan%3E%3C!----%3E%3C%2Ftd%3E%3C%2Ftr%3E%3Ctr id="L272"> | { | ||
| method: "POST", | |||
| headers: { | |||
| Authorization: `Bearer ${hfToken}`, | |||
| "Content-Type": "application/json", | |||
| }, | |||
| body: JSON.stringify({ | |||
| model: "deepseek-ai/DeepSeek-R1", | |||
| messages: [ | |||
| { | |||
| role: "system", | |||
| content: systemPrompt | |||
| }, | |||
| { | |||
| role: "user", | |||
| content: prompt | |||
| } | |||
| ], | |||
| temperature: 0.7, | |||
| max_tokens: 2000 | |||
| }), | |||
| } | |||
| ); | |||
| if (!response.ok) { | |||
| throw new Error(`API request failed: ${response.status} ${response.statusText}`); | |||
| } | |||
| const data = await response.json(); | |||
| const vegaSpec = data.choices[0].message.content; | |||
| // Parse and validate the Vega-Lite spec | |||
| let spec; | |||
| try { | |||
| // Try to extract JSON if wrapped in markdown code blocks | |||
| let jsonStr = vegaSpec.trim(); | |||
| if (jsonStr.startsWith('```')) { | |||
| jsonStr = jsonStr.replace(/```json\n?/g, '').replace(/```\n?/g, ''); | |||
| } | |||
| spec = JSON.parse(jsonStr); | |||
| } catch (e) { | |||
| throw new Error(`Failed to parse LLM response as JSON: ${e.message}`); | |||
| } | |||
| // Fetch data for the visualization | |||
| setStatus('Fetching data for visualization...', 'info'); | |||
| const query = `SELECT * FROM 'data.parquet' LIMIT 1000`; | |||
| const result = await conn.query(query); | |||
| const dataArray = result.toArray(); | |||
| // Inject data into the spec | |||
| spec.data = { values: dataArray }; | |||
| // Render the visualization | |||
| setStatus('Rendering visualization...', 'info'); | |||
| await vegaEmbed('#vizContainer', spec); | |||
| setStatus('Visualization generated successfully!', 'success'); | |||
| } catch (error) { | |||
| console.error('Error generating visualization:', error); | |||
| setStatus(`Error: ${error.message}`, 'error'); | |||
| } | |||
| } | |||
| // Handle generate visualization button | |||
| document.getElementById('generateVizBtn').addEventListener('click', async function() { | |||
| const prompt = document.getElementById('vizPrompt').value.trim(); | |||
| const hfToken = document.getElementById('hfToken').value.trim(); | |||
| if (!prompt) { | |||
| setStatus('Please enter a visualization prompt', 'error'); | |||
| return; | |||
| } | |||
| if (!hfToken) { | |||
| setStatus('Please enter your Hugging Face token', 'error'); | |||
| return; | |||
| } | |||
| await generateVisualization(prompt, hfToken); | |||
| }); | |||
| // Set up event listeners | |||
| document.getElementById('queryForm').addEventListener('submit', handleSubmit); | |||
| // Initialize on load | |||
| setStatus('Ready to query parquet files!', 'success'); | |||
| </script> | |||
| </body> | |||
| </html> | |||