Spaces:

suusuu93
/

chatAI

Build error

App Files Files Community

chatAI / app.py

suusuu93

Update app.py

6924870 verified 4 months ago

raw

history blame contribute delete

3.92 kB

	import os
	import gradio as gr

	from langchain_groq import ChatGroq
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters.sentence_transformers import SentenceTransformersTokenTextSplitter
	from langchain_chroma import Chroma


	# ==============================
	# CONFIG
	# ==============================
	os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN", "")

	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY not found in environment variables")

	DATASET_PATH = "dataset.pdf"
	PERSIST_DIR = "pharma_db"

	os.makedirs(PERSIST_DIR, exist_ok=True)


	# ==============================
	# EMBEDDINGS (FASTER MODEL)
	# ==============================
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)


	# ==============================
	# VECTOR DB
	# ==============================
	db = Chroma(
	persist_directory=PERSIST_DIR,
	embedding_function=embeddings
	)


	# ==============================
	# LOAD & INDEX PDF
	# ==============================
	if os.path.exists(DATASET_PATH):

	# Only index if DB empty
	if len(db.get()["ids"]) == 0:
	print("Indexing PDF...")

	loader = PyPDFLoader(DATASET_PATH)
	documents = loader.load()

	splitter = SentenceTransformersTokenTextSplitter(
	chunk_size=500,
	chunk_overlap=50
	)

	chunks = splitter.split_documents(documents)
	db.add_documents(chunks)

	print("✅ PDF indexed.")

	else:
	print("⚠️ PDF not found in repo.")


	# ==============================
	# PROMPT
	# ==============================
	prompt = ChatPromptTemplate.from_messages([
	("system", """You are 'Dr MomAI Assistant', a specialized medical AI expert focused on mom and baby.
	GUIDELINES:
	1. INTERACTIVE GREETINGS: If the user greets you (e.g., "Hi", "Hello", "Who are you?"), respond politely, introduce yourself as Dr Mom AI Assistant, and explain that you are here to help them understand information.
	2. CONTEXTUAL ACCURACY: For all medical or factual questions, prioritize the information provided in the 'Context' section below.
	3. STRICTNESS: If the question is medical in nature but the answer is NOT found in the context, explicitly state something like this: "I'm sorry, but that specific information is not available in my current medical knowledge."
	4. TONE: Maintain a professional, empathetic, and clinical tone. Use bullet points for complex medical explanations to ensure clarity.
	Context:
	{context}"""),
	("human", "{question}")
	])

	output_parser = StrOutputParser()


	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)


	# ==============================
	# RAG QUERY
	# ==============================
	def run_query(question):

	if not question.strip():
	return "Please enter a question."

	retriever = db.as_retriever(search_kwargs={"k": 5})

	llm = ChatGroq(
	model="llama-3.1-8b-instant",
	api_key=GROQ_API_KEY,
	temperature=0
	)

	rag_chain = (
	{
	"context": retriever \| format_docs,
	"question": RunnablePassthrough(),
	}
	\| prompt
	\| llm
	\| output_parser
	)

	return rag_chain.invoke(question)


	# ==============================
	# GRADIO UI
	# ==============================
	interface = gr.Interface(
	fn=run_query,
	inputs=gr.Textbox(
	label="Question",
	placeholder="Ask me something..."
	),
	outputs=gr.Textbox(
	label="Response",
	lines=10
	),
	title="Your Assistant",
	description="Ask questions"
	)

	interface.launch()