update app
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
import gc
|
| 4 |
import json
|
|
@@ -21,9 +20,6 @@ from transformers import (
|
|
| 21 |
AutoTokenizer,
|
| 22 |
)
|
| 23 |
|
| 24 |
-
# =========================
|
| 25 |
-
# Config
|
| 26 |
-
# =========================
|
| 27 |
MAX_MAX_NEW_TOKENS = 4096
|
| 28 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 29 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
@@ -42,9 +38,6 @@ if torch.cuda.is_available():
|
|
| 42 |
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
|
| 43 |
print("Using device:", device)
|
| 44 |
|
| 45 |
-
# =========================
|
| 46 |
-
# Models
|
| 47 |
-
# =========================
|
| 48 |
MODEL_ID_X = "Senqiao/VisionThink-Efficient"
|
| 49 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
|
| 50 |
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
@@ -104,9 +97,7 @@ MODELS = {
|
|
| 104 |
|
| 105 |
MODEL_CHOICES = list(MODELS.keys())
|
| 106 |
|
| 107 |
-
|
| 108 |
-
# Examples
|
| 109 |
-
# =========================
|
| 110 |
image_examples = [
|
| 111 |
{"query": "Describe the safety measures in the image. Conclude (Safe / Unsafe).", "image": "images/5.jpg", "model": "Lumian-VLR-7B-Thinking"},
|
| 112 |
{"query": "Convert this page to doc [markdown] precisely.", "image": "images/3.png", "model": "Typhoon-OCR-3B"},
|
|
@@ -116,9 +107,7 @@ image_examples = [
|
|
| 116 |
{"query": "Convert chart to OTSL.", "image": "images/2.png", "model": "openbmb/MiniCPM-V-4"},
|
| 117 |
]
|
| 118 |
|
| 119 |
-
|
| 120 |
-
# Helpers
|
| 121 |
-
# =========================
|
| 122 |
def pil_to_data_url(img: Image.Image, fmt="PNG"):
|
| 123 |
buf = BytesIO()
|
| 124 |
img.save(buf, format=fmt)
|
|
@@ -213,9 +202,6 @@ def calc_timeout_image(*args, **kwargs):
|
|
| 213 |
except Exception:
|
| 214 |
return 60
|
| 215 |
|
| 216 |
-
# =========================
|
| 217 |
-
# Inference
|
| 218 |
-
# =========================
|
| 219 |
@spaces.GPU(duration=calc_timeout_image)
|
| 220 |
def generate_image(
|
| 221 |
model_name,
|
|
@@ -364,9 +350,6 @@ def run_image(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_
|
|
| 364 |
def noop():
|
| 365 |
return None
|
| 366 |
|
| 367 |
-
# =========================
|
| 368 |
-
# SVGs
|
| 369 |
-
# =========================
|
| 370 |
THUNDER_SVG = f"""
|
| 371 |
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
| 372 |
<path fill="white" d="M13.2 2L5 13h5l-1.2 9L19 10h-5l-.8-8Z"/>
|
|
@@ -389,9 +372,7 @@ MODEL_TABS_HTML = "".join([
|
|
| 389 |
for m in MODEL_CHOICES
|
| 390 |
])
|
| 391 |
|
| 392 |
-
|
| 393 |
-
# CSS
|
| 394 |
-
# =========================
|
| 395 |
css = f"""
|
| 396 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
|
| 397 |
*{{box-sizing:border-box;margin:0;padding:0}}
|
|
@@ -417,7 +398,7 @@ footer{{display:none!important}}
|
|
| 417 |
box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
|
| 418 |
}}
|
| 419 |
.app-header{{
|
| 420 |
-
background:linear-gradient(135deg,#18181b,#
|
| 421 |
padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
|
| 422 |
}}
|
| 423 |
.app-header-left{{display:flex;align-items:center;gap:12px}}
|
|
@@ -701,9 +682,6 @@ footer{{display:none!important}}
|
|
| 701 |
}}
|
| 702 |
"""
|
| 703 |
|
| 704 |
-
# =========================
|
| 705 |
-
# JS
|
| 706 |
-
# =========================
|
| 707 |
gallery_js = r"""
|
| 708 |
() => {
|
| 709 |
function init() {
|
|
@@ -1163,9 +1141,6 @@ watchOutputs();
|
|
| 1163 |
}
|
| 1164 |
"""
|
| 1165 |
|
| 1166 |
-
# =========================
|
| 1167 |
-
# App
|
| 1168 |
-
# =========================
|
| 1169 |
with gr.Blocks() as demo:
|
| 1170 |
hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
|
| 1171 |
prompt = gr.Textbox(value="", elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
|
|
@@ -1311,7 +1286,7 @@ with gr.Blocks() as demo:
|
|
| 1311 |
</div>
|
| 1312 |
|
| 1313 |
<div class="exp-note">
|
| 1314 |
-
Experimental VLM Suite
|
| 1315 |
</div>
|
| 1316 |
|
| 1317 |
<div class="app-statusbar">
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import gc
|
| 3 |
import json
|
|
|
|
| 20 |
AutoTokenizer,
|
| 21 |
)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
MAX_MAX_NEW_TOKENS = 4096
|
| 24 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 25 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
|
|
| 38 |
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
|
| 39 |
print("Using device:", device)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
MODEL_ID_X = "Senqiao/VisionThink-Efficient"
|
| 42 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
|
| 43 |
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
|
|
| 97 |
|
| 98 |
MODEL_CHOICES = list(MODELS.keys())
|
| 99 |
|
| 100 |
+
|
|
|
|
|
|
|
| 101 |
image_examples = [
|
| 102 |
{"query": "Describe the safety measures in the image. Conclude (Safe / Unsafe).", "image": "images/5.jpg", "model": "Lumian-VLR-7B-Thinking"},
|
| 103 |
{"query": "Convert this page to doc [markdown] precisely.", "image": "images/3.png", "model": "Typhoon-OCR-3B"},
|
|
|
|
| 107 |
{"query": "Convert chart to OTSL.", "image": "images/2.png", "model": "openbmb/MiniCPM-V-4"},
|
| 108 |
]
|
| 109 |
|
| 110 |
+
|
|
|
|
|
|
|
| 111 |
def pil_to_data_url(img: Image.Image, fmt="PNG"):
|
| 112 |
buf = BytesIO()
|
| 113 |
img.save(buf, format=fmt)
|
|
|
|
| 202 |
except Exception:
|
| 203 |
return 60
|
| 204 |
|
|
|
|
|
|
|
|
|
|
| 205 |
@spaces.GPU(duration=calc_timeout_image)
|
| 206 |
def generate_image(
|
| 207 |
model_name,
|
|
|
|
| 350 |
def noop():
|
| 351 |
return None
|
| 352 |
|
|
|
|
|
|
|
|
|
|
| 353 |
THUNDER_SVG = f"""
|
| 354 |
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
| 355 |
<path fill="white" d="M13.2 2L5 13h5l-1.2 9L19 10h-5l-.8-8Z"/>
|
|
|
|
| 372 |
for m in MODEL_CHOICES
|
| 373 |
])
|
| 374 |
|
| 375 |
+
|
|
|
|
|
|
|
| 376 |
css = f"""
|
| 377 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
|
| 378 |
*{{box-sizing:border-box;margin:0;padding:0}}
|
|
|
|
| 398 |
box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
|
| 399 |
}}
|
| 400 |
.app-header{{
|
| 401 |
+
background:linear-gradient(135deg,#18181b,#1e1e24);border-bottom:1px solid #27272a;
|
| 402 |
padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
|
| 403 |
}}
|
| 404 |
.app-header-left{{display:flex;align-items:center;gap:12px}}
|
|
|
|
| 682 |
}}
|
| 683 |
"""
|
| 684 |
|
|
|
|
|
|
|
|
|
|
| 685 |
gallery_js = r"""
|
| 686 |
() => {
|
| 687 |
function init() {
|
|
|
|
| 1141 |
}
|
| 1142 |
"""
|
| 1143 |
|
|
|
|
|
|
|
|
|
|
| 1144 |
with gr.Blocks() as demo:
|
| 1145 |
hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
|
| 1146 |
prompt = gr.Textbox(value="", elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
|
|
|
|
| 1286 |
</div>
|
| 1287 |
|
| 1288 |
<div class="exp-note">
|
| 1289 |
+
Experimental VLM Suite
|
| 1290 |
</div>
|
| 1291 |
|
| 1292 |
<div class="app-statusbar">
|