prithivMLmods commited on
Commit
ee275ec
·
verified ·
1 Parent(s): 8b09592

update app

Browse files
Files changed (1) hide show
  1. app.py +5 -30
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import gc
4
  import json
@@ -21,9 +20,6 @@ from transformers import (
21
  AutoTokenizer,
22
  )
23
 
24
- # =========================
25
- # Config
26
- # =========================
27
  MAX_MAX_NEW_TOKENS = 4096
28
  DEFAULT_MAX_NEW_TOKENS = 1024
29
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -42,9 +38,6 @@ if torch.cuda.is_available():
42
  print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
43
  print("Using device:", device)
44
 
45
- # =========================
46
- # Models
47
- # =========================
48
  MODEL_ID_X = "Senqiao/VisionThink-Efficient"
49
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
50
  model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -104,9 +97,7 @@ MODELS = {
104
 
105
  MODEL_CHOICES = list(MODELS.keys())
106
 
107
- # =========================
108
- # Examples
109
- # =========================
110
  image_examples = [
111
  {"query": "Describe the safety measures in the image. Conclude (Safe / Unsafe).", "image": "images/5.jpg", "model": "Lumian-VLR-7B-Thinking"},
112
  {"query": "Convert this page to doc [markdown] precisely.", "image": "images/3.png", "model": "Typhoon-OCR-3B"},
@@ -116,9 +107,7 @@ image_examples = [
116
  {"query": "Convert chart to OTSL.", "image": "images/2.png", "model": "openbmb/MiniCPM-V-4"},
117
  ]
118
 
119
- # =========================
120
- # Helpers
121
- # =========================
122
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
123
  buf = BytesIO()
124
  img.save(buf, format=fmt)
@@ -213,9 +202,6 @@ def calc_timeout_image(*args, **kwargs):
213
  except Exception:
214
  return 60
215
 
216
- # =========================
217
- # Inference
218
- # =========================
219
  @spaces.GPU(duration=calc_timeout_image)
220
  def generate_image(
221
  model_name,
@@ -364,9 +350,6 @@ def run_image(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_
364
  def noop():
365
  return None
366
 
367
- # =========================
368
- # SVGs
369
- # =========================
370
  THUNDER_SVG = f"""
371
  <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
372
  <path fill="white" d="M13.2 2L5 13h5l-1.2 9L19 10h-5l-.8-8Z"/>
@@ -389,9 +372,7 @@ MODEL_TABS_HTML = "".join([
389
  for m in MODEL_CHOICES
390
  ])
391
 
392
- # =========================
393
- # CSS
394
- # =========================
395
  css = f"""
396
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
397
  *{{box-sizing:border-box;margin:0;padding:0}}
@@ -417,7 +398,7 @@ footer{{display:none!important}}
417
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
418
  }}
419
  .app-header{{
420
- background:linear-gradient(135deg,#18181b,#132013);border-bottom:1px solid #27272a;
421
  padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
422
  }}
423
  .app-header-left{{display:flex;align-items:center;gap:12px}}
@@ -701,9 +682,6 @@ footer{{display:none!important}}
701
  }}
702
  """
703
 
704
- # =========================
705
- # JS
706
- # =========================
707
  gallery_js = r"""
708
  () => {
709
  function init() {
@@ -1163,9 +1141,6 @@ watchOutputs();
1163
  }
1164
  """
1165
 
1166
- # =========================
1167
- # App
1168
- # =========================
1169
  with gr.Blocks() as demo:
1170
  hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
1171
  prompt = gr.Textbox(value="", elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
@@ -1311,7 +1286,7 @@ with gr.Blocks() as demo:
1311
  </div>
1312
 
1313
  <div class="exp-note">
1314
- Experimental VLM Suite &middot; Video inference removed as requested
1315
  </div>
1316
 
1317
  <div class="app-statusbar">
 
 
1
  import os
2
  import gc
3
  import json
 
20
  AutoTokenizer,
21
  )
22
 
 
 
 
23
  MAX_MAX_NEW_TOKENS = 4096
24
  DEFAULT_MAX_NEW_TOKENS = 1024
25
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
38
  print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
39
  print("Using device:", device)
40
 
 
 
 
41
  MODEL_ID_X = "Senqiao/VisionThink-Efficient"
42
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True, use_fast=False)
43
  model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
97
 
98
  MODEL_CHOICES = list(MODELS.keys())
99
 
100
+
 
 
101
  image_examples = [
102
  {"query": "Describe the safety measures in the image. Conclude (Safe / Unsafe).", "image": "images/5.jpg", "model": "Lumian-VLR-7B-Thinking"},
103
  {"query": "Convert this page to doc [markdown] precisely.", "image": "images/3.png", "model": "Typhoon-OCR-3B"},
 
107
  {"query": "Convert chart to OTSL.", "image": "images/2.png", "model": "openbmb/MiniCPM-V-4"},
108
  ]
109
 
110
+
 
 
111
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
112
  buf = BytesIO()
113
  img.save(buf, format=fmt)
 
202
  except Exception:
203
  return 60
204
 
 
 
 
205
  @spaces.GPU(duration=calc_timeout_image)
206
  def generate_image(
207
  model_name,
 
350
  def noop():
351
  return None
352
 
 
 
 
353
  THUNDER_SVG = f"""
354
  <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
355
  <path fill="white" d="M13.2 2L5 13h5l-1.2 9L19 10h-5l-.8-8Z"/>
 
372
  for m in MODEL_CHOICES
373
  ])
374
 
375
+
 
 
376
  css = f"""
377
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
378
  *{{box-sizing:border-box;margin:0;padding:0}}
 
398
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
399
  }}
400
  .app-header{{
401
+ background:linear-gradient(135deg,#18181b,#1e1e24);border-bottom:1px solid #27272a;
402
  padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
403
  }}
404
  .app-header-left{{display:flex;align-items:center;gap:12px}}
 
682
  }}
683
  """
684
 
 
 
 
685
  gallery_js = r"""
686
  () => {
687
  function init() {
 
1141
  }
1142
  """
1143
 
 
 
 
1144
  with gr.Blocks() as demo:
1145
  hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
1146
  prompt = gr.Textbox(value="", elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
 
1286
  </div>
1287
 
1288
  <div class="exp-note">
1289
+ Experimental VLM Suite
1290
  </div>
1291
 
1292
  <div class="app-statusbar">