| import os |
| import gradio as gr |
|
|
| from langchain_groq import ChatGroq |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain_core.output_parsers import StrOutputParser |
| from langchain_core.runnables import RunnablePassthrough |
|
|
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_text_splitters.sentence_transformers import SentenceTransformersTokenTextSplitter |
| from langchain_chroma import Chroma |
|
|
|
|
| |
| |
| |
| os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN", "") |
|
|
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
|
| if not GROQ_API_KEY: |
| raise ValueError("GROQ_API_KEY not found in environment variables") |
|
|
| DATASET_PATH = "dataset.pdf" |
| PERSIST_DIR = "pharma_db" |
|
|
| os.makedirs(PERSIST_DIR, exist_ok=True) |
|
|
|
|
| |
| |
| |
| embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2" |
| ) |
|
|
|
|
| |
| |
| |
| db = Chroma( |
| persist_directory=PERSIST_DIR, |
| embedding_function=embeddings |
| ) |
|
|
|
|
| |
| |
| |
| if os.path.exists(DATASET_PATH): |
|
|
| |
| if len(db.get()["ids"]) == 0: |
| print("Indexing PDF...") |
|
|
| loader = PyPDFLoader(DATASET_PATH) |
| documents = loader.load() |
|
|
| splitter = SentenceTransformersTokenTextSplitter( |
| chunk_size=500, |
| chunk_overlap=50 |
| ) |
|
|
| chunks = splitter.split_documents(documents) |
| db.add_documents(chunks) |
|
|
| print("✅ PDF indexed.") |
|
|
| else: |
| print("⚠️ PDF not found in repo.") |
| |
|
|
| |
| |
| |
| prompt = ChatPromptTemplate.from_messages([ |
| ("system", """You are 'Dr MomAI Assistant', a specialized medical AI expert focused on mom and baby. |
| GUIDELINES: |
| 1. INTERACTIVE GREETINGS: If the user greets you (e.g., "Hi", "Hello", "Who are you?"), respond politely, introduce yourself as Dr Mom AI Assistant, and explain that you are here to help them understand information. |
| 2. CONTEXTUAL ACCURACY: For all medical or factual questions, prioritize the information provided in the 'Context' section below. |
| 3. STRICTNESS: If the question is medical in nature but the answer is NOT found in the context, explicitly state something like this: "I'm sorry, but that specific information is not available in my current medical knowledge." |
| 4. TONE: Maintain a professional, empathetic, and clinical tone. Use bullet points for complex medical explanations to ensure clarity. |
| Context: |
| {context}"""), |
| ("human", "{question}") |
| ]) |
|
|
| output_parser = StrOutputParser() |
|
|
|
|
| def format_docs(docs): |
| return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
| |
| |
| |
| def run_query(question): |
|
|
| if not question.strip(): |
| return "Please enter a question." |
|
|
| retriever = db.as_retriever(search_kwargs={"k": 5}) |
|
|
| llm = ChatGroq( |
| model="llama-3.1-8b-instant", |
| api_key=GROQ_API_KEY, |
| temperature=0 |
| ) |
|
|
| rag_chain = ( |
| { |
| "context": retriever | format_docs, |
| "question": RunnablePassthrough(), |
| } |
| | prompt |
| | llm |
| | output_parser |
| ) |
|
|
| return rag_chain.invoke(question) |
|
|
|
|
| |
| |
| |
| interface = gr.Interface( |
| fn=run_query, |
| inputs=gr.Textbox( |
| label="Question", |
| placeholder="Ask me something..." |
| ), |
| outputs=gr.Textbox( |
| label="Response", |
| lines=10 |
| ), |
| title="Your Assistant", |
| description="Ask questions" |
| ) |
|
|
| interface.launch() |