Text Generation
PEFT
English
security
vulnerability-detection
code-repair
zero-day
exploit-scanner
cybersecurity
sft
qlora
Instructions to use jacobmahon/zero-day-exploit-scanner-fixer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use jacobmahon/zero-day-exploit-scanner-fixer with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| """ | |
| Zero-Day Exploit Scanner & Fixer - Inference Script | |
| ==================================================== | |
| Scans code for vulnerabilities and generates fixes. | |
| Usage: | |
| python inference.py --code "int main() { char buf[10]; gets(buf); }" | |
| python inference.py --file vulnerable_code.c | |
| python inference.py --interactive | |
| Requirements: | |
| pip install transformers peft torch bitsandbytes accelerate | |
| """ | |
| import argparse | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import PeftModel | |
| MODEL_ID = "jacobmahon/zero-day-exploit-scanner-fixer" | |
| BASE_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct" | |
| SYSTEM_PROMPT = """You are a world-class security expert specializing in zero-day vulnerability detection and remediation. When given code, you will: | |
| 1. SCAN: Determine if the code contains a security vulnerability | |
| 2. IDENTIFY: If vulnerable, identify the CWE type and CVE ID if known | |
| 3. EXPLAIN: Provide a clear explanation of the vulnerability mechanism, attack vector, and potential impact | |
| 4. FIX: Provide the corrected code that patches the vulnerability | |
| Always respond in the following structured format: | |
| ## SCAN RESULT | |
| [VULNERABLE / SAFE] | |
| ## VULNERABILITY DETAILS | |
| - **CWE**: [CWE ID and name] | |
| - **CVE**: [CVE ID if known, otherwise "N/A"] | |
| - **Severity**: [CRITICAL / HIGH / MEDIUM / LOW] | |
| ## EXPLANATION | |
| [Detailed explanation of the vulnerability] | |
| ## VULNERABLE LINES | |
| [Specific lines or patterns that are vulnerable] | |
| ## FIXED CODE | |
| ``` | |
| [Corrected code] | |
| ``` | |
| ## FIX EXPLANATION | |
| [What was changed and why]""" | |
| def load_model(model_id=MODEL_ID, base_model=BASE_MODEL, device="auto"): | |
| """Load the fine-tuned model with QLoRA adapter.""" | |
| print(f"Loading base model: {base_model}") | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| quantization_config=bnb_config, | |
| device_map=device, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| print(f"Loading LoRA adapter: {model_id}") | |
| model = PeftModel.from_pretrained(model, model_id) | |
| model.eval() | |
| return model, tokenizer | |
| def scan_code(code: str, model, tokenizer, language: str = "auto", max_new_tokens: int = 2048): | |
| """Scan code for vulnerabilities and generate fixes.""" | |
| if language == "auto": | |
| # Simple language detection heuristics | |
| if "#include" in code or "malloc" in code or "void " in code: | |
| language = "C" | |
| elif "def " in code or "import " in code: | |
| language = "Python" | |
| elif "function " in code or "const " in code or "=>" in code: | |
| language = "JavaScript" | |
| elif "public class" in code or "System.out" in code: | |
| language = "Java" | |
| else: | |
| language = "code" | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Analyze the following {language} code for security vulnerabilities and provide a fix if needed:\n\n```{language.lower()}\n{code}\n```"}, | |
| ] | |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| temperature=0.3, | |
| top_p=0.9, | |
| do_sample=True, | |
| repetition_penalty=1.1, | |
| ) | |
| response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| return response | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Zero-Day Exploit Scanner & Fixer") | |
| parser.add_argument("--code", type=str, help="Code string to analyze") | |
| parser.add_argument("--file", type=str, help="Path to file to analyze") | |
| parser.add_argument("--interactive", action="store_true", help="Interactive mode") | |
| parser.add_argument("--language", type=str, default="auto", help="Programming language") | |
| parser.add_argument("--model", type=str, default=MODEL_ID, help="Model ID") | |
| parser.add_argument("--base-model", type=str, default=BASE_MODEL, help="Base model ID") | |
| args = parser.parse_args() | |
| model, tokenizer = load_model(args.model, args.base_model) | |
| if args.code: | |
| result = scan_code(args.code, model, tokenizer, args.language) | |
| print(result) | |
| elif args.file: | |
| with open(args.file, "r") as f: | |
| code = f.read() | |
| print(f"\nScanning: {args.file}") | |
| print("=" * 60) | |
| result = scan_code(code, model, tokenizer, args.language) | |
| print(result) | |
| elif args.interactive: | |
| print("Zero-Day Exploit Scanner & Fixer") | |
| print("Enter code to analyze (type 'END' on a new line to submit, 'quit' to exit)") | |
| print("=" * 60) | |
| while True: | |
| print("\nEnter code:") | |
| lines = [] | |
| while True: | |
| line = input() | |
| if line.strip() == "END": | |
| break | |
| if line.strip() == "quit": | |
| return | |
| lines.append(line) | |
| code = "\n".join(lines) | |
| if not code.strip(): | |
| continue | |
| print("\nAnalyzing...") | |
| result = scan_code(code, model, tokenizer, args.language) | |
| print("\n" + result) | |
| else: | |
| # Demo with example vulnerable code | |
| demo_code = ''' | |
| void process_input(char *user_input) { | |
| char buffer[64]; | |
| strcpy(buffer, user_input); // No bounds checking | |
| printf("Processed: %s\\n", buffer); | |
| } | |
| int main() { | |
| char input[1024]; | |
| gets(input); // Unsafe input | |
| process_input(input); | |
| return 0; | |
| } | |
| ''' | |
| print("Demo: Scanning example vulnerable C code") | |
| print("=" * 60) | |
| result = scan_code(demo_code, model, tokenizer, "C") | |
| print(result) | |
| if __name__ == "__main__": | |
| main() | |