o
    oh*T                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dlm Z mZ d dlm!Z! d dl"Z"d dl#m$Z$ dZ%dZ&dZ'G dd deZ(G dd deZ)G dd deZ*G dd deZ+G dd deZ,G dd deZ-dd  Z.d!d" Z/d#d$ Z0d%d& Z1d7d(d)Z2d*d+ Z3d,e4fd-d.Z5d/d0 Z6d1d2 Z7e8d3krd4Z9d5Z:d6Z;e7e9e:e; dS dS )8    N)OllamaEmbeddings)FAISS)ChatGroq)ChatPromptTemplate)StrOutputParserPydanticOutputParser)OutputFixingParser)RunnablePassthrough)	PdfReader)	BaseModelField)ListOptionalDict)urlparse)Unionr   )RunnableLambda)
ChatOllama8gsk_CEh3itIpUAkEkEKsUDqVWGdyb3FYoTjqmXNTBHOSxJFK3obGTzXZznomic-embed-textz"44622834-f22df6f12cf45558ee180dd8dc                   @   sz   e Zd ZU edZeed< edddZee ed< edddZ	e
e ed	< edd
dZeed< edddZee ed< dS )SlideContentflashtypeNz$An optional subheading for the slidedescription
subheading.z(List of paragraphs for the slide content
paragraphszUA specific and concise suggestion for a relevant visualization or image (max 5 words)visualization_suggestionzURL of the image for the slideimage)__name__
__module____qualname__r   r   str__annotations__r   r   r   r   r   r    r#   r#   U/var/www/eduai.edurigo.com/storigo/production/generate_storigo_content_from_prompt.pyr      s   
 r   c                   @   s>   e Zd ZU edddZeeef ed< edddZ	e
ed< dS )StorigoContent1.7Dictionary of slide contents with slide numbers as keysr   slides/Total token count for all the generated contenttoken_countNr   r   r    r   r'   r   r!   r   r"   r)   intr#   r#   r#   r$   r%   %   s   
 r%   c                   @   s^   e Zd ZU edZeed< edddZeed< edddZe	e ed< edd	dZ
eed
< dS )
MCQContentQuestionr   .zThe multiple-choice questionr   questionzA list of 4 answer optionsoptionsz0The correct answer (e.g., 'a', 'b', 'c', or 'd')correct_answerN)r   r   r    r   r   r!   r"   r.   r/   r   r0   r#   r#   r#   r$   r,   )   s
   
 r,   c                   @   s6   e Zd ZU edddZeeef ed< dZ	e
ed< dS )StorigoContent.r&   r   r'   r   r)   Nr*   r#   r#   r#   r$   r1   /   s   
 r1   c                   @   sF   e Zd ZU edddZeeef ed< edddZ	eee
f ed< dS )StorigoContentMCQ.r&   r   r'   z8Dictionary of MCQs with identifiers like 'mcq_1' as keysmcqsN)r   r   r    r   r'   r   r!   r   r"   r3   r,   r#   r#   r#   r$   r2   5   s   
  r2   c                   @   sF   e Zd ZU edddZeeeee	f f e
d< edddZee
d< dS )StorigoContentMCQMid.zYDictionary of slide contents with slide numbers as keys and MCQs with MCQ numbers as keysr   r'   r(   r)   N)r   r   r    r   r'   r   r!   r   r   r,   r"   r)   r+   r#   r#   r#   r$   r4   ;   s   
 $r4   c              
   C   sl   z t td}t| g|}d| }tj|dd || |W S  ty5 } z	tdt| d }~ww )N)modelzmy_embeddings/T)exist_okzError creating embeddings: )	r   OLLAMA_MODELr   
from_textsosmakedirs
save_local	Exceptionr!   )text	client_id
embeddingsvectors
client_direr#   r#   r$   create_embeddings@   s   


rC   c                    s   t |}|  dd| }td| }tg d  fdd|D }|   fdd|D  }|d tdt| }d|S )N \w+andortheaaninonattoforofwithbyc                       g | ]}| vr|qS r#   r#   .0wordcommon_wordsr#   r$   
<listcomp>X       z)generate_search_query.<locals>.<listcomp>c                    rT   r#   r#   rU   )suggestion_wordsr#   r$   rZ   \   r[      )	extract_context_keywordsjoinrefindalllowersetsplitminlen)r   slide_contentcontext_keywordscombined_querywordsfiltered_wordsprioritized_wordsquery_wordsr#   )rY   r\   r$   generate_search_queryM   s   
rn   c                    s^   | j pd dd| j }td| }tg d  fdd|D }tt|d d S )N rD   rE   rF   c                    rT   r#   r#   rU   rX   r#   r$   rZ   i   r[   z,extract_context_keywords.<locals>.<listcomp>   )r   r_   r   r`   ra   rb   rc   list)rg   r=   rj   keywordsr#   rX   r$   r^   c   s
   r^   c              
   C   s   d}t | dddddd}z/tj||d}|  | }|d	 r2t|d	 d
d dd}|d d W S td|   W d S  tjyX } ztdt|  W Y d }~d S d }~w t	ys } ztdt|  W Y d }~d S d }~ww )Nzhttps://pixabay.com/api/photo
horizontalr]   true	relevance)keyq
image_typeorientationper_page
safesearchorder)paramshitsc                 S   s   | d | d  S )Nlikes	downloadsr#   xr#   r#   r$   <lambda>   s    z%fetch_pixabay_image.<locals>.<lambda>T)rw   reverser   webformatURLzNo image found for query: z#Error fetching image from Pixabay: z)Unexpected error in fetch_pixabay_image: )
PIXABAY_API_KEYrequestsgetraise_for_statusjsonsortedprintRequestExceptionr!   r<   )queryurlr~   responsedatasorted_hitsrB   r#   r#   r$   fetch_pixabay_imagel   s6   
r      c                 C   s   | st d d S t|D ]S}z1t| |}t d|d  d|  t|}|r2t d|  |W   S t d|  td W q ty_ } zt d|d  dt|  W Y d }~qd }~ww t d	| d
 d S )Nz%No visualization suggestion provided.zAttempt r   z to fetch image for query: zValid image found: z!No image URL returned for query: z"Error in get_valid_image (attempt z): zNo valid image found after z	 attempts)r   rangern   r   timesleepr<   r!   )r   rg   max_attemptsattemptr   	image_urlrB   r#   r#   r$   get_valid_image   s&   

(r   c                 C   s   t d| }t|S )NrE   )r`   ra   rf   )r=   tokensr#   r#   r$   count_tokens   s   r   returnc                 C   s   t | dr	| j}nt| }| }d|v sd|v r3|d}|dd }|dkr3|dkr3||| }td	d
|}zt	|}d|v rLt
|d W S W |S    Y |S )NcontentzHere's another attemptzI apologize{}r   r   z	'(\w+)\":z"\1":
properties)hasattrr   r!   stripfindrfindr`   subr   loadsdumps)
ai_messager=   startendparsedr#   r#   r$   quick_json_fix   s&   


r   c           2   
      s  zS|dkrt dtd}ntddd}d}ttd t|}	d	d
 dd
  fdd
d|	B |B  B }
|
| |d}tt	|j
 dd
 d}td t| td |j
 D ]\}}t|jdk rq|jd t|jdk sdqYt|tr||dk}nt|tr| dv }nd}|r|j
 D ]*\}}|jrt|j|}|r||_qtd| d d|_qtd| d d|_qn|j
 D ]}d |_qd}|j
 D ]}|j dd|j d|j }|t|7 }qtd t| td |j
}t| |rLtd  d!}d"}ttd}t|}t|}i }t| }tdt|dD ]z}t||k rg }t|t|d t|D ]$}|| }|| }t |d#d$} t |d%g }!||  d&d|!  q<d'|}"tt!|" td( t|" t"t#}#t$j%||d)}$||B |#B |$B |"|& d*}%td+ |%|d,t|d  < q%d}&| D ]}'|'j' dd|'j( d|'j) }|&t|7 }&qi }(d})|}*|}+|dkr|+dkr|*|+ n|*},t*|D ].\}-}|| |(|< |-d |, dkr	|)|+k r	d,|)d  }.t|. ||. |(|.< |)d7 })qt+|(|d-}/|/W S |D ]	}|| |(|< qt|+D ]})d,|)d  }.|.|v r:||. |(|.< q$td.|.  q$t+|(|d-}/|/W S t||d-}0|0W S  t,yj }1 z	t,d/t|1 d }1~1ww )0Nr   zllama3-70b-8192)
model_namegroq_api_keyzhttp://127.0.0.1:11434z	llama3:8b)base_urlr5   aq  
        Based on the following prompt, generate professional and engaging content for exactly {num_slides} slides for a Storigos presentation.
        Each slide MUST have ALL of the following:
        - A subheading
        - 2-3 paragraphs
        - A specific, concise visualization suggestion (max 5 words)

        For the visualization suggestion:
        - Provide a clear and specific description of an image that would be relevant to the slide content.
        - Keep it very concise, using a maximum of 5 words.
        - Focus on concrete objects, scenes, or concepts that can be easily visualized.
        - Avoid abstract or overly complex ideas.
        - Include the context of the topic.

        Prompt: {prompt}

        Be creative and professional. Include engaging elements like:
        - Thought-provoking questions
        - Relevant statistics or data points
        - Industry insights or trends
        - Practical examples or case studies
        - Calls to action

        {format_instructions}

        Ensure that the output is a valid JSON object with keys "slide_1", "slide_2", etc., each containing the slide content.
        ALL fields ( subheading, paragraphs, visualization_suggestion) MUST be filled for each slide.
        Do not use null values. If you can't think of content for a field, provide a relevant placeholder or general information.
        )pydantic_objectc                 S      | d S )Npromptr#   r   r#   r#   r$   r          z4generate_slide_content_from_prompt.<locals>.<lambda>c                 S   r   )N
num_slidesr#   r   r#   r#   r$   r      r   c                    s      S )N)get_format_instructionsr   parserr#   r$   r      r   )r   r   format_instructions)r   r   c                 S   r   )Nr   r#   )itemr#   r#   r$   r     r   )rw   r   r      z2Additional information and context for this slide.r   )1ru   yes0Fz+Warning: No suitable image found for slide z after multiple attempts.z>https://via.placeholder.com/640x480.png?text=Placeholder+Imagez/Warning: No visualization suggestion for slide .rD   resultSADASDaDASDDSADAzMCQ Starteda  
                Based on the following context from the last two slides, generate one multiple-choice question (MCQ). The question should be relevant to the content and designed to test comprehension.

                **Context**: {context}

                The final output **must be valid JSON only**. Ensure all keys and string values are enclosed in double quotes and do not include any additional strings or explanations:


                Don't give any conversational like text, For example: "Here is the MCQ:". Just return JSON Output.
                Here is the pydantic class to validate the output that you give. Make sure the check always passes.

                class MCQContent(BaseModel):
                type: str = Field("Question")
                question: str = Field(..., description="The multiple-choice question")
                options: List[str] = Field(..., description="A list of 4 answer options")
                correct_answer: str = Field(..., description="The correct answer (e.g., 'a', 'b', 'c', or 'd')")

                CRITICAL: Return ONLY the JSON object with no additional text, explanations, or formatting.

                {format_instructions}

                JSON:u  
Based on the following context from the last two slides, generate one multiple-choice question (MCQ). The question should be relevant to the content and designed to test comprehension.

Context:
{context}

The final output MUST be a **valid JSON object**, and nothing else. Do not include any explanation or conversational text (e.g., "Here is your MCQ" or "Let's try again").

Output format example (use this exact structure):


{{
  "type": "Question",
  "question": "What is the capital of France?",
  "options": ["Berlin", "London", "Paris", "Madrid"],
  "correct_answer": "Paris"
}}


Ensure:
- All strings use double quotes
- No trailing commas
- Exactly 4 options
- Output must be enclosed in <json> ... </json> tags
- Only return the JSON block inside these tags
- Never include any phrases like “I apologize”, “Here is”, or “Let’s retry”. Only return valid JSON.

CRITICAL: Return ONLY the JSON object with no additional text, explanations, or formatting.

r   ro   r   z: 
context_text)llmr   )contextr   zV======================================================================================mcq_)r'   r)   z [Warning] Skipping missing MCQ: z Error generating slide content: )-r   GROQ_API_KEYr   r   r1   r   from_templateinvokedictr   r'   itemsr   rf   r   append
isinstancer+   r!   r   r   r   r   valuesr   r_   r   r,   rq   keysr   re   getattrr   r   r   r   from_llmr   r.   r/   r0   	enumerater4   r<   )2r   r   num_mcqsis_imageis_questionquestion_positionGPUr   slide_content_templateslide_content_promptslide_content_chainr   ordered_slides	slide_keyrg   is_image_boolr   token_count_texttext_content
all_slidesmcq_templatemcq_output_parser_template
mcq_parser
mcq_prompt!mcq_output_parser_template_promptr3   
slide_keysicontext_slidesjrw   slidetitleparasr   
json_fixeroutput_fixing_parser
mcq_resultr)   mcqinterleaved_contentmcq_countertotal_slides
total_mcqsintervalidxmcq_keystorigo_contentstorigo_content_without_mcrB   r#   r   r$   "generate_slide_content_from_prompt   s  


	


 
 


 

r   c           
      C   sf   t   }d}d}d}d}td t| ||||||}t   }	td t| td|	| dd	 d S )
N   Tr   r   StartingzGenerated Slide Content:z
Process took z.2fz seconds)r   r   r   )
r   r   r   
start_timer   r   r   r   rg   end_timer#   r#   r$   main  s   r  __main__zCThe history and impact of artificial intelligence in modern society   F)r   )<r9   r   randomr`   r   r   langchain_community.embeddingsr    langchain_community.vectorstoresr   langchain_groqr   langchain_core.promptsr   langchain_core.output_parsersr   r   langchain.output_parsersr   langchain_core.runnablesr	   PyPDF2r
   pydanticr   r   typingr   r   r   urllib.parser   r   r   shutillangchain_ollamar   r   r7   r   r   r%   r,   r1   r2   r4   rC   rn   r^   r   r   r   r!   r   r   r  r   r   r   r   r#   r#   r#   r$   <module>   s^    
	
  