import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
#from langchain_google_genai import GoogleGenerativeAIEmbeddings
#import google.generativeai as genai
#from langchain.vectorstores import FAISS
#from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

load_dotenv()
import os
import time
import random
import re
import json
import requests
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from crawl4ai import AsyncWebCrawler
import asyncio
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredPowerPointLoader
from PyPDF2 import PdfReader
from langchain_experimental.text_splitter import SemanticChunker
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from urllib.parse import urlparse
from typing import Union, Dict
from langchain_core.runnables import RunnableLambda
import shutil
from pptx import Presentation
from langchain_ollama import OllamaLLM
from langchain_ollama import ChatOllama


GROQ_API_KEY = "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"
OLLAMA_MODEL = "nomic-embed-text"
PIXABAY_API_KEY = "44622834-f22df6f12cf45558ee180dd8d"
#os.getenv("GOOGLE_API_KEY")
#genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))






def get_pdf_text(pdf_docs):
    text=""
    for pdf in pdf_docs:
        pdf_reader= PdfReader(pdf)
        for page in pdf_reader.pages:
            text+= page.extract_text()
    return  text



def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

def split_text_with_semantic_chunker(text, embeddings):
    """Splits the text into semantic chunks using the given embeddings."""
    text_splitter = SemanticChunker(
        embeddings, breakpoint_threshold_type="percentile"  # Can be changed to "standard_deviation", "interquartile"
    )
    #documents = text_splitter.create_documents([doc.page_content for doc in docs])
    chunks = text_splitter.split_text(text)
    #documents = text_splitter.create_documents([doc['page_content'] for doc in docs])
    print("Documents split into semantic chunks.")
    return chunks


# def get_vector_store(text_chunks):
#     embeddings = OllamaEmbeddings(model='nomic-embed-text')
#     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
#     vector_store.save_local("faiss_index")
def get_vector_store(text_chunks):
    embeddings = OllamaEmbeddings(model='nomic-embed-text')
    
    # Load existing FAISS index if available
    faiss_index_path = "faiss_index"
    if os.path.exists(faiss_index_path):
        existing_db = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
        existing_db.add_texts(text_chunks)  # Append new texts
        existing_db.save_local(faiss_index_path)
    else:
        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
        vector_store.save_local(faiss_index_path)


def get_conversational_chain():

    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    # model = ChatGoogleGenerativeAI(model="gemini-pro",
    #                          temperature=0.3)
    model = ChatOllama(
            base_url = 'http://127.0.0.1:11434',
            model = "llama3:8b"
            #model = "deepseek-r1:8b"
        )

    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

    return chain



# def user_input(user_question):
#     embeddings = embeddings = OllamaEmbeddings(model='nomic-embed-text')
    
#     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
#     docs = new_db.similarity_search(user_question)

#     chain = get_conversational_chain()

    
#     response = chain(
#         {"input_documents":docs, "question": user_question}
#         , return_only_outputs=True)

#     print(response)
#     st.write("Reply: ", response["output_text"])

def user_input(user_question):
    embeddings = OllamaEmbeddings(model='nomic-embed-text')
    
    # Load FAISS index
    faiss_index_path = "faiss_index"
    if os.path.exists(faiss_index_path):
        new_db = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
        
        # Retrieve more documents
        docs = new_db.similarity_search(user_question, k=10)  # Increase retrieval
    else:
        st.write("No PDF has been uploaded and processed yet!")
        return
    
    # Check if relevant documents were retrieved
    if not docs:
        st.write("No relevant information found in the PDFs.")
        return

    chain = get_conversational_chain()
    
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)

    print(response)
    st.write("Reply: ", response["output_text"])



def main():
    st.set_page_config("Chat PDF")
    st.header("Chat with PDF using PURU AI💁")

    user_question = st.text_input("Ask a Question from the PDF Files")

    if user_question:
        user_input(user_question)

    with st.sidebar:
        st.title("Menu:")
        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
        if st.button("Submit & Process"):
            with st.spinner("Processing..."):
                raw_text = get_pdf_text(pdf_docs)
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks)
                st.success("Done")



if __name__ == "__main__":
    main()