
# Firstly make outline:

## Take pdf-process it.
## convert into text(transcribe it)
## then chunk that
## Save it into embeddings
## load embeddings
## make conversational chat i.e memory history
## user ask question and it gives result based on all pdf's

from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS, Chroma
from langchain_groq import ChatGroq
document_path =[ 
    "cyber.pdf",
    "conduct.pdf"
]
all_docs=[]
for file_path in document_path:
    print(file_path)
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    all_docs.extend(docs)
    # print("docs11")
    # print(docs)
print(f"Loaded {len(all_docs)} pages in total.")
# 1. Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(all_docs)
print(f"Split into {len(chunks)} chunks")

# 2. Create embeddings and store in vector database
embeddings = OllamaEmbeddings(model="nomic-embed-text")  # Replace with your preferred embeddings
vectorstore = Chroma.from_documents(chunks, embeddings)


#llm = ChatOpenAI(temperature=0)  # Or your preferred LLM
llm = ChatOllama(
        base_url = 'http://127.0.0.1:11434',
        model = "llama3:8b"
        #model = "deepseek-r1:8b"
    )
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.memory import ConversationBufferMemory

# 3. Set up memory
memory = ConversationBufferMemory(
    return_messages=True,
    output_key="answer",
    input_key="question"
)
# 3. Store conversation history in memory
conversation_history = []

#4. Create a custom prompt template that includes conversation history
template = """
You are a helpful assistant that answers questions based on the provided PDF documents.
Answer the question based only on the following context from the PDFs:
{context}

Previous conversation history:
{chat_history}

Current question: {question}

Provide a comprehensive and accurate answer based on the information in the PDFs.
"""

prompt = ChatPromptTemplate.from_template(template)


# 6. Format documents and conversation history
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def format_chat_history(history):
    if not history:
        return "No previous conversation."
    formatted = ""
    for i, (q, a) in enumerate(history):
        formatted += f"Question {i+1}: {q}\nAnswer {i+1}: {a}\n\n"
    return formatted

# 7. Build the conversational chatbot function
def chat_with_pdfs(query):
    # Retrieve relevant documents
    docs = retriever.get_relevant_documents(query)
    context = format_docs(docs)
    
    # Format the conversation history
    chat_history = format_chat_history(conversation_history)
    
    # Generate response
    messages = prompt.format_messages(
        context=context,
        chat_history=chat_history,
        question=query
    )
    response = llm.invoke(messages).content
    
    # Update conversation history
    conversation_history.append((query, response))
    
    return response

# 8. Interactive chat interface
def interactive_chat():
    print("PDF Chatbot initialized. Type 'exit' to quit.")
    while True:
        query = input("\nYour question: ")
        if query.lower() in ["exit", "quit"]:
            break
        response = chat_with_pdfs(query)
        print(f"\nChatbot: {response}")

# Run interactive chat
if __name__ == "__main__":
    interactive_chat()