import gradio as gr import os import torch import numpy as np import random from huggingface_hub import login from transformers import AutoTokenizer, AutoModelForSequenceClassification from scipy.special import softmax import logging import spaces import csv from openai import AzureOpenAI import re # Login to Hugging Face token = os.getenv("hf_token") if token: login(token=token) csv.field_size_limit(1000000) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') seed = 42 np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) model_paths = [ 'karths/binary_classification_train_port', 'karths/binary_classification_train_perf', "karths/binary_classification_train_main", "karths/binary_classification_train_secu", "karths/binary_classification_train_reli", "karths/binary_classification_train_usab", "karths/binary_classification_train_comp" ] quality_mapping = { 'binary_classification_train_port': 'Portability', 'binary_classification_train_main': 'Maintainability', 'binary_classification_train_secu': 'Security', 'binary_classification_train_reli': 'Reliability', 'binary_classification_train_usab': 'Usability', 'binary_classification_train_perf': 'Performance', 'binary_classification_train_comp': 'Compatibility' } tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base") models_dict = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths} def get_quality_name(model_name): return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality") azure_api_key = os.getenv("AZURE_OPENAI_API_KEY") azure_client = AzureOpenAI( azure_endpoint="https://gpt-ifi-prog-eksperimenter-swe1.openai.azure.com/", api_key=azure_api_key, api_version="2025-04-01-preview" ) azure_deployment_name = "gpt-5.4-nano-AM-karthik-prod" def md_to_html(text): """Convert markdown to HTML using only stdlib re.""" text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r'^### (.+)$', r'
' + part.replace('\n', '
') + '
Please enter an issue description.
"), gr.update(value=""), gr.update(value="") ) if len(text.strip()) < 30: return ( gr.update(value="Text too short (minimum 30 characters).
"), gr.update(value=""), gr.update(value="") ) # GPU classification results = run_classification_models(text) if not results: return ( gr.update(value="No prediction above the 0.80 threshold, Try making the issue more descriptive and verbose.
"), gr.update(value=""), gr.update(value="") ) top_result = sorted(results, key=lambda x: x[1], reverse=True) quality_name = top_result[0][0] # Prediction badge HTML prediction_html = f"""The model returned an empty response.
" except Exception as e: logging.error(f"Azure error: {e}", exc_info=True) explanation_title = "" explanation_body = f"API Error: {e}
" return ( gr.update(value=prediction_html), gr.update(value=explanation_title), gr.update(value=explanation_body) ) css = """ .expl-title { font-size:15px; font-weight:bold; padding:8px 12px 0; } .expl-body { padding: 8px 12px 12px; line-height: 1.7; border: 1px solid var(--border-color-primary, #ccc); border-radius: 8px; background: var(--background-fill-primary, #fff); color: var(--body-text-color, #111); min-height: 80px; } .dark .expl-body { background: #1f2937 !important; border-color: #374151 !important; color: #f3f4f6 !important; } """ example_texts = [ [ "Title: Classification Inaccuracy in Edge Case Scenarios\n\n" "Detailed Description: The current machine learning algorithm demonstrates a significant failure to " "accurately categorize data into positive and negative classes when encountering edge cases. This " "suggests a lack of robustness in the decision boundary at the extremes of the feature space.\n" "Environment: Live Production Environment\n" "Step-by-Step Reproduction: Execute the primary classifier against the validated test dataset, " "specifically filtering for known boundary conditions and edge case parameters." ], [ "Title: Regression Suite Coverage Gap for Concurrent Sessions\n\n" "Detailed Description: Analysis of the current regression testing framework reveals a critical omission " "regarding multi-user concurrency. The suite currently validates single-user workflows but fails to " "simulate race conditions or resource locking issues inherent in simultaneous sessions.\n" "Environment: CI/CD Test Automation Pipeline\n" "Step-by-Step Reproduction: Modify existing automation scripts to initialize multiple parallel user " "sessions and monitor for state synchronization errors." ], [ "Title: Systematic Communication Breakdown Between Dev and QA\n\n" "Detailed Description: There is a recurring discrepancy between technical implementation and quality " "assurance validation due to ambiguous feature specifications. This misalignment leads to delayed " "releases and frequent rework of features that do not meet the intended design criteria.\n" "Environment: Inter-departmental Stakeholder Meetings\n" "Step-by-Step Reproduction: Conduct a formal audit of Jira ticket comments, Slack communication logs, " "and internal documentation from the past three sprint cycles to identify specific points of divergence." ], [ "Title: Lack of Fault Isolation in Service-Oriented Architecture\n\n" "Detailed Description: The microservices architecture currently lacks robust circuit-breaking and " "isolation mechanisms. Consequently, a localized failure in a single downstream service propagates " "unhindered, triggering a cascading failure across the entire system ecosystem.\n" "Environment: Distributed Microservices Infrastructure\n" "Step-by-Step Reproduction: Introduce a manual failure or latency injection into a non-critical " "dependency and document the resulting performance degradation and crash reports across the service mesh." ] ] with gr.Blocks(css=css, title="QualityTagger") as interface: gr.Markdown("# QualityTagger") gr.Markdown( "Classifies issue text into quality domains (Security, Usability, Maintainability, " "Reliability, etc.) and explains why." ) with gr.Row(): with gr.Column(scale=1): text_input = gr.Textbox( lines=7, label="Issue Description", placeholder="Enter your issue text here..." ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") submit_btn = gr.Button("Submit", variant="primary") with gr.Column(scale=1): prediction_output = gr.HTML(label="Prediction") # Split explanation into TWO HTML components so Gradio 4.26 updates both reliably explanation_title = gr.HTML(elem_classes="expl-title") explanation_body = gr.HTML( elem_classes="expl-body", value="Explanation will appear here after submission." ) gr.Examples( examples=example_texts, inputs=text_input, outputs=[prediction_output, explanation_title, explanation_body], fn=main_interface, cache_examples=False, label="Examples" ) submit_btn.click( fn=main_interface, inputs=text_input, outputs=[prediction_output, explanation_title, explanation_body] ) clear_btn.click( fn=lambda: ( gr.update(value=""), gr.update(value=""), gr.update(value="Explanation will appear here after submission.") ), inputs=[], outputs=[prediction_output, explanation_title, explanation_body] ) if __name__ == "__main__": interface.launch()