import gradio as gr import os import torch import numpy as np import random from huggingface_hub import login from transformers import AutoTokenizer, AutoModelForSequenceClassification from scipy.special import softmax import logging import spaces import csv from openai import AzureOpenAI import re # Login to Hugging Face token = os.getenv("hf_token") if token: login(token=token) csv.field_size_limit(1000000) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') seed = 42 np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) model_paths = [ 'karths/binary_classification_train_port', 'karths/binary_classification_train_perf', "karths/binary_classification_train_main", "karths/binary_classification_train_secu", "karths/binary_classification_train_reli", "karths/binary_classification_train_usab", "karths/binary_classification_train_comp" ] quality_mapping = { 'binary_classification_train_port': 'Portability', 'binary_classification_train_main': 'Maintainability', 'binary_classification_train_secu': 'Security', 'binary_classification_train_reli': 'Reliability', 'binary_classification_train_usab': 'Usability', 'binary_classification_train_perf': 'Performance', 'binary_classification_train_comp': 'Compatibility' } tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base") models_dict = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths} def get_quality_name(model_name): return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality") azure_api_key = os.getenv("AZURE_OPENAI_API_KEY") azure_client = AzureOpenAI( azure_endpoint="https://gpt-ifi-prog-eksperimenter-swe1.openai.azure.com/", api_key=azure_api_key, api_version="2025-04-01-preview" ) azure_deployment_name = "gpt-5.4-nano-AM-karthik-prod" def md_to_html(text): """Convert markdown to HTML using only stdlib re.""" text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r'^### (.+)$', r'

\1

', text, flags=re.MULTILINE) text = re.sub(r'^## (.+)$', r'

\1

', text, flags=re.MULTILINE) text = re.sub(r'^# (.+)$', r'

\1

', text, flags=re.MULTILINE) # Numbered lists lines = text.split('\n') out, in_list = [], False for line in lines: m = re.match(r'^\d+\.\s+(.*)', line) if m: if not in_list: out.append('
    ') in_list = True out.append(f'
  1. {m.group(1)}
  2. ') else: if in_list: out.append('
') in_list = False out.append(line) if in_list: out.append('') text = '\n'.join(out) # Paragraphs parts = re.split(r'\n{2,}', text.strip()) result = [] for part in parts: part = part.strip() if part and not re.match(r'^<(h[2-4]|ol|li|ul)', part): part = '

' + part.replace('\n', '
') + '

' result.append(part) return '\n'.join(result) def generate_explanation(issue_text, quality_name): prompt = ( f"Analyze the following issue description based on the quality dimension: {quality_name}.\n\n" f"Issue Description:\n---\n{issue_text}\n---\n\n" f"1. **Justification**: Briefly explain why this issue relates to {quality_name}.\n" f"2. **Improved Version**: Suggest a rewrite to better meet this quality standard.\n\n" f"Be concise and direct." ) response = azure_client.chat.completions.create( model=azure_deployment_name, messages=[ {"role": "system", "content": "You are an expert software engineering assistant specializing in software quality analysis."}, {"role": "user", "content": prompt} ], max_completion_tokens=300, temperature=0.9, top_p=0.9 ) raw = response.choices[0].message.content logging.info(f"[EXPLANATION RAW] length={len(raw)}, preview={repr(raw[:120])}") return raw.strip() if raw else "" @spaces.GPU(duration=60) def run_classification_models(text): device = "cuda" if torch.cuda.is_available() else "cpu" results = [] for model_path, model in models_dict.items(): model.to(device) model.eval() inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.inference_mode(): outputs = model(**inputs) probs = softmax(outputs.logits.cpu().numpy(), axis=1) avg_prob = np.mean(probs[:, 1]) quality_name = get_quality_name(model_path) if avg_prob >= 0.80: results.append((quality_name, avg_prob)) return results def main_interface(text): if not text or not text.strip(): return ( gr.update(value="

Please enter an issue description.

"), gr.update(value=""), gr.update(value="") ) if len(text.strip()) < 30: return ( gr.update(value="

Text too short (minimum 30 characters).

"), gr.update(value=""), gr.update(value="") ) # GPU classification results = run_classification_models(text) if not results: return ( gr.update(value="

No prediction above the 0.80 threshold, Try making the issue more descriptive and verbose.

"), gr.update(value=""), gr.update(value="") ) top_result = sorted(results, key=lambda x: x[1], reverse=True) quality_name = top_result[0][0] # Prediction badge HTML prediction_html = f"""
Top Prediction {quality_name}
""" # Azure explanation try: raw_text = generate_explanation(text, quality_name) if raw_text: body_html = md_to_html(raw_text) explanation_title = f"Why this is a {quality_name} issue:" explanation_body = body_html else: explanation_title = "" explanation_body = "

The model returned an empty response.

" except Exception as e: logging.error(f"Azure error: {e}", exc_info=True) explanation_title = "" explanation_body = f"

API Error: {e}

" return ( gr.update(value=prediction_html), gr.update(value=explanation_title), gr.update(value=explanation_body) ) css = """ .expl-title { font-size:15px; font-weight:bold; padding:8px 12px 0; } .expl-body { padding: 8px 12px 12px; line-height: 1.7; border: 1px solid var(--border-color-primary, #ccc); border-radius: 8px; background: var(--background-fill-primary, #fff); color: var(--body-text-color, #111); min-height: 80px; } .dark .expl-body { background: #1f2937 !important; border-color: #374151 !important; color: #f3f4f6 !important; } """ example_texts = [ [ "Title: Classification Inaccuracy in Edge Case Scenarios\n\n" "Detailed Description: The current machine learning algorithm demonstrates a significant failure to " "accurately categorize data into positive and negative classes when encountering edge cases. This " "suggests a lack of robustness in the decision boundary at the extremes of the feature space.\n" "Environment: Live Production Environment\n" "Step-by-Step Reproduction: Execute the primary classifier against the validated test dataset, " "specifically filtering for known boundary conditions and edge case parameters." ], [ "Title: Regression Suite Coverage Gap for Concurrent Sessions\n\n" "Detailed Description: Analysis of the current regression testing framework reveals a critical omission " "regarding multi-user concurrency. The suite currently validates single-user workflows but fails to " "simulate race conditions or resource locking issues inherent in simultaneous sessions.\n" "Environment: CI/CD Test Automation Pipeline\n" "Step-by-Step Reproduction: Modify existing automation scripts to initialize multiple parallel user " "sessions and monitor for state synchronization errors." ], [ "Title: Systematic Communication Breakdown Between Dev and QA\n\n" "Detailed Description: There is a recurring discrepancy between technical implementation and quality " "assurance validation due to ambiguous feature specifications. This misalignment leads to delayed " "releases and frequent rework of features that do not meet the intended design criteria.\n" "Environment: Inter-departmental Stakeholder Meetings\n" "Step-by-Step Reproduction: Conduct a formal audit of Jira ticket comments, Slack communication logs, " "and internal documentation from the past three sprint cycles to identify specific points of divergence." ], [ "Title: Lack of Fault Isolation in Service-Oriented Architecture\n\n" "Detailed Description: The microservices architecture currently lacks robust circuit-breaking and " "isolation mechanisms. Consequently, a localized failure in a single downstream service propagates " "unhindered, triggering a cascading failure across the entire system ecosystem.\n" "Environment: Distributed Microservices Infrastructure\n" "Step-by-Step Reproduction: Introduce a manual failure or latency injection into a non-critical " "dependency and document the resulting performance degradation and crash reports across the service mesh." ] ] with gr.Blocks(css=css, title="QualityTagger") as interface: gr.Markdown("# QualityTagger") gr.Markdown( "Classifies issue text into quality domains (Security, Usability, Maintainability, " "Reliability, etc.) and explains why." ) with gr.Row(): with gr.Column(scale=1): text_input = gr.Textbox( lines=7, label="Issue Description", placeholder="Enter your issue text here..." ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") submit_btn = gr.Button("Submit", variant="primary") with gr.Column(scale=1): prediction_output = gr.HTML(label="Prediction") # Split explanation into TWO HTML components so Gradio 4.26 updates both reliably explanation_title = gr.HTML(elem_classes="expl-title") explanation_body = gr.HTML( elem_classes="expl-body", value="Explanation will appear here after submission." ) gr.Examples( examples=example_texts, inputs=text_input, outputs=[prediction_output, explanation_title, explanation_body], fn=main_interface, cache_examples=False, label="Examples" ) submit_btn.click( fn=main_interface, inputs=text_input, outputs=[prediction_output, explanation_title, explanation_body] ) clear_btn.click( fn=lambda: ( gr.update(value=""), gr.update(value=""), gr.update(value="Explanation will appear here after submission.") ), inputs=[], outputs=[prediction_output, explanation_title, explanation_body] ) if __name__ == "__main__": interface.launch()