o
    oh                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ dZd	d
 ZG dd dZG dd dZdd Zdd Zedkrdee  dS dS )    N)tqdm)OllamaEmbeddings)FAISS)ChatGroq)PromptTemplate)
ChatOllama8gsk_Q6G2mqkFL74aSdlDkU3OWGdyb3FYBdtLlFxhe78b1g17n1Ew181wc                 C   s   t dd}d|  d| }tj|r:dd t|D }|r0tj||dd}td	|  |S td
| d d S td|  d| d d S )Nznomic-embed-text)modelzmy_embeddings_video//c                 S   s   g | ]	}| d r|qS ))z.faissz.pkl)endswith).0f r   J/var/www/eduai.edurigo.com/question_generate/testing/generate_mcq_video.py
<listcomp>   s    z'load_all_embeddings.<locals>.<listcomp>T)allow_dangerous_deserializationzEmbeddings loaded from zNo embedding files found in .z"No embeddings found for client_id z and reference_id )r   ospathisdirlistdirr   
load_localprint)	client_idreference_id
embeddingsreference_dirembedding_filesvectorsr   r   r   load_all_embeddings   s   
r   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )QuestionHistoryc                 C   s"   d| d| d| _ |  | _d S )Nzquestion_history_video/r
   z.json)	file_pathload	questions)selfr   r   r   r   r   __init__    s   zQuestionHistory.__init__c                 C   s   t j| jrHz*t| jd}|  }W d    n1 sw   Y  |r.tt	|W S t W S  tj
yG   td| j d t  Y S w td t S )NrzWarning: Invalid JSON in z!. Starting with an empty history.z6No history file found. Starting with an empty history.)r   r   existsr!   openreadstripsetjsonloadsJSONDecodeErrorr   )r$   r   contentr   r   r   r"   $   s   
zQuestionHistory.loadc                 C   s\   t jt j| jdd t| jd}tt| j	| W d    d S 1 s'w   Y  d S )NT)exist_okw)
r   makedirsr   dirnamer!   r(   r,   dumplistr#   )r$   r   r   r   r   save1   s   "zQuestionHistory.savec                 C   s   | j | d S N)r#   addr$   questionr   r   r   r8   6   s   zQuestionHistory.addc                 C   s
   || j v S r7   )r#   r9   r   r   r   __contains__9      
zQuestionHistory.__contains__c                 C   s
   t | jS r7   )lenr#   )r$   r   r   r   __len__<   r<   zQuestionHistory.__len__N)	__name__
__module____qualname__r%   r"   r6   r8   r;   r>   r   r   r   r   r       s    r    c                   @   s   e Zd Zdd Zdd ZdS )RateLimiterc                 C   s   || _ || _g | _d S r7   )max_requestsperiodrequests)r$   rC   rD   r   r   r   r%   @   s   
zRateLimiter.__init__c                    sr   t     fddjD _tjjkr/j jd   }|dkr/t|I d H  jt    d S )Nc                    s   g | ]} | j k r|qS r   )rD   )r   reqnowr$   r   r   r   G   s    z$RateLimiter.wait.<locals>.<listcomp>r   )timerE   r=   rC   rD   asynciosleepappend)r$   
sleep_timer   rG   r   waitE   s   zRateLimiter.waitN)r?   r@   rA   r%   rN   r   r   r   r   rB   ?   s    rB   c                    sZ  t | |}|std g ddfS t| ||jddid}|dkr)tdtd ntdd	d
 tddgddt	dtj
g }d}d}tddd fdd}	t|dd}
t d }d}||I d H }dd |D }t||k rt |k r|t| }t|d}t|}zH|	|||I d H }|D ]5}t||k r|d vr|| |d  |d7 }|t|d  tdd |d   D  7 }q|
d W n ty } ztd!|  W Y d }~qrd }~ww t||k rt |k s~t||k rtd"t| d#| d$   |t|fW  d    S 1 s&w   Y  d S )%Nz=No embeddings found for the given client ID and reference ID.r   k
   )search_kwargszllama3-70b-8192)
model_namegroq_api_keyzhttp://127.0.0.1:11434z	llama3:8b)base_urlr	   contextnum_questionsa:  
        {context}
        Generate {num_questions} multiple-choice questions based on the information provided above. Each question should have one correct option and three incorrect options. Use the following format:
        Q:
        A)
        B)
        C)
        D)
        Correct:
        Ensure questions are concise and directly related to the content. Try to cover different aspects of the provided information. Do not include phrases like "in the document", "according to the text", or any other references to the source material in the questions.
        )input_variablestemplatezFQ: (.+?)\nA\) (.+?)\nB\) (.+?)\nC\) (.+?)\nD\) (.+?)\nCorrect: ([A-D])      <   )rC   rD   c              
      s     I d H  j| |d} |gI d H }|j}g }|D ]G}| \}}	}
}}}| }tjdd|tj	d }tjdd|tj	d }|vri|
|t| ||	 |
 | | d|d q"|S )N)rU   rV   ze\b(in|from|according to|mentioned in|stated in|as per) (the|this)? ?(document|text|passage|content)\b )flagszQ^(The|This) (document|text|passage|content) (states|mentions|says|indicates) that)ABCD)numberr:   optionscorrect_answer)rN   formatainvoker/   finditergroupsr*   resub
IGNORECASErL   r=   )rU   start_number
batch_sizepromptresponseresponse_strr#   matchr:   option_aoption_boption_coption_dcorrect_letterllm	mcq_regexmcq_templatequestion_historyrate_limiterr   r   process_contextt   s0   
z+generate_mcq_video.<locals>.process_contextzGenerating MCQs)totaldesci,  z6Provide a comprehensive overview of the entire contentc                 S   s   g | ]}|j qS r   )page_content)r   resultr   r   r   r      s    z&generate_mcq_video.<locals>.<listcomp>   r:   c                 s   s    | ]	}t | V  qd S r7   )r=   split)r   optionr   r   r   	<genexpr>   s    z%generate_mcq_video.<locals>.<genexpr>rc   zError processing context: zWarning: Only generated z unique questions out of z requested.)r   r   r    as_retrieverr   GROQ_API_KEYr   r   ri   compileDOTALLrB   r   rI   rf   r=   minrandomchoicerL   r8   r   sumvaluesupdate	Exceptionr6   )r   rV   r   GPUr   	retrievermcqsquestion_numbertotal_token_countr}   pbartimeoutqueryresultscontextsremaining_questionsrm   rU   new_mcqsmcqer   rw   r   generate_mcq_videoN   sj   





.&r   c            	         s   d} d}d}t | ||I d H \}}}|std d S td|  td|  |D ].}td|d  d|d	   |d
  D ]\}}t| d|  qAtd|d  d q+d S )Nr   r   z'No multiple-choice questions generated.zTotal questions in history: zTotal token count: z	Question rb   z: r:   rc   z) zCorrect Answer: rd   
)r   r   items)	r   rV   r   r   history_counttoken_countr   r   textr   r   r   main   s    r   __main__)r   r   r,   rJ   ri   rI   r   langchain_community.embeddingsr    langchain_community.vectorstoresr   langchain_groqr   langchain.promptsr   langchain_ollamar   r   r   r    rB   r   r   r?   runr   r   r   r   <module>   s*     c