from fastapi import FastAPI, Form, HTTPException
from fastapi.responses import JSONResponse
import tempfile
import requests
#from llama3_generate_storigo_content import extract_text_from_pdf, create_embeddings, generate_slide_content
from generate_storigo_content import load_pdf,transcribe, load_txt,split_text_with_semantic_chunker,crawlerrr,read_file_url,split_text_with_semantic_chunker_for_url,clean_using_llm,count_total_words,save_documents_to_txt,create_and_save_embeddings,generate_slide_content,merge_all_faiss,parsing, marks_splitter, allocate_slides,create_embeddings,generate_slide_content_alloc1
from generate_storigo_content_from_prompt import generate_slide_content_from_prompt

from langchain_community.embeddings import OllamaEmbeddings
import os 
app = FastAPI()
import logging
import traceback

def fetch_document_with_curl(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        raise HTTPException(status_code=404, detail="Document not found")

def process_and_generate_response(split_documents, client_id, num_slides,num_mcqs, is_image, is_question, question_position,isGPU):
    try:
        # Save embeddings
        create_and_save_embeddings(split_documents, client_id)
        merge_embeddings = merge_all_faiss(client_id)

        # Generate slide content
        slide_content = generate_slide_content(merge_embeddings, client_id, num_slides,num_mcqs, is_image,is_question, question_position,isGPU)
        print(f"Slide content generated for {num_slides} slides.")

        # Serialize and return response
        response_data = slide_content.dict()
        return JSONResponse(content=response_data)
    # except Exception as e:
    #     print(f"Error during processing: {str(e)}")
    #     raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
    except Exception as e:
        # Log the actual error for debugging
        logging.error(f"Error generating content: {str(e)}")
        
        # Return a generic error message to the frontend
        return JSONResponse(
            content={"error": "Something went wrong. Please try again later."},
            status_code=500
        )
        
    
@app.post("/generate-storigo-content")
async def generate_storigo_content(
    client_id: str = Form(...),
    num_slides: int = Form(...),
    document_url: str = Form(...),
    is_image: str = Form(...),
    document_type:str = Form(...),
    is_question: str = Form(...),
    num_mcqs: int = Form(...),
    question_position : int = Form(...), 
    isGPU: int = Form(...),
) -> JSONResponse:
    try:
        if document_type == "pdf":
            print(f"Fetching document from: {document_url}")
            document_content = fetch_document_with_curl(document_url)
            print(f"Document fetched, size: {len(document_content)} bytes")

            # Save PDF to a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                temp_file.write(document_content)
                temp_file_path = temp_file.name
                print(f"Temporary file saved at: {temp_file_path}")

            try:
                # Load and process the PDF
                # docs = load_pdf(temp_file_path)
                # print(f"{len(docs)} pages loaded from PDF.")
                parse_data = parsing(temp_file_path)
                with open("parse_data.md", "w", encoding="utf-8") as f:
                    f.write(parse_data)
                headers_to_split_on = [
                ("#", "Header 1"),
                ("##", "Header 2")
                ] 
                with open("parse_data.md", "r", encoding="utf-8") as f:
                    content = f.read()
                chunks = marks_splitter(headers_to_split_on, content)
                allocation = allocate_slides(chunks, num_slides, min_chars=100)
                embeddings = create_embeddings(chunks, client_id)
                slide_content = generate_slide_content_alloc1(chunks,allocation,num_slides,num_mcqs, is_image, is_question, question_position,isGPU)

                # Process the PDF into embeddings and semantic chunks
                # embeddings = OllamaEmbeddings(model='nomic-embed-text')
                # split_documents = split_text_with_semantic_chunker(docs, embeddings)
                # print("Text split into semantic chunks.")
                response_data = slide_content.dict()
                return JSONResponse(content=response_data)
                # Use common processing function
                #return process_and_generate_response(split_documents, client_id, num_slides,num_mcqs, is_image, is_question, question_position,isGPU)

            finally:
                os.remove(temp_file_path)  # Cleanup temp file

        elif document_type == "html":
            print(f"Fetching HTML content from: {document_url}")
            if "youtube" in document_url.lower():
                file_path = transcribe(document_url)
                embeddings = OllamaEmbeddings(model='nomic-embed-text')
                text = load_txt(file_path)
                split_documents1 = split_text_with_semantic_chunker(text, embeddings)
                meaningful_content = clean_using_llm(split_documents1)
                split_documents = split_text_with_semantic_chunker_for_url(meaningful_content, embeddings)
            else:
                
            # Crawl the URL for content
                filename = await crawlerrr(document_url)
                print(f"Crawled content saved to: {filename}")

                # Read the content from the saved file
                raw_content = read_file_url(filename)
                if not raw_content:
                    raise HTTPException(status_code=500, detail="Failed to read content from the URL: RAW CONTENT")

                # Clean and extract meaningful content using LLM
                meaningful_content = clean_using_llm(raw_content)
                print("Extracted meaningful content.")

                # Split the content into semantic chunks
                embeddings = OllamaEmbeddings(model='nomic-embed-text')
                split_documents = split_text_with_semantic_chunker_for_url(meaningful_content, embeddings)

            # Use common processing function
            return process_and_generate_response(split_documents, client_id, num_slides,num_mcqs, is_image, is_question, question_position,isGPU)

        else:
            raise HTTPException(status_code=400, detail="Unsupported source type. Please use 'pdf' or 'html'.")


    except Exception as e:
        # Log the actual error for debugging
        logging.error(f"Error generating content: {str(e)}")
        
        # Return a generic error message to the frontend
        # return JSONResponse(
        #     content={"error": "Something went wrong. Please try again later."},
        #     status_code=500
        # )
        return JSONResponse(
        status_code=500,
        content={
            "error": str(e),
            # only include this in dev—remove in prod!
            "trace": traceback.format_exc().splitlines()
        }
    )

@app.post("/generate-storigo-content-from-prompt")
async def generate_storigo_content_from_prompt(
    client_id: int = Form(...),
    prompt: str = Form(...),
    num_slides: int = Form(...),
    is_image: int = Form(...),
    is_question: str = Form(...),
    num_mcqs: int = Form(...),
    question_position : int = Form(...),
    isGPU: int = Form(...),
) -> JSONResponse:
    try:
        # Generate slide content from prompt
        storigo_content = generate_slide_content_from_prompt(prompt, num_slides ,num_mcqs, is_image == 1, is_question, question_position , isGPU)

        # Convert the Pydantic model to a dictionary
        response_data = storigo_content.dict()

        return JSONResponse(content=response_data)
    except Exception as e:
        # Log the actual error for debugging
        logging.error(f"Error generating content: {str(e)}")
        
        # Return a generic error message to the frontend
        return JSONResponse(
            content={"error": "Something went wrong. Please try again later."},
            status_code=500
        )