o
    Jph                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlZd dlZd dlmZ d dlZdZd	d
 ZG dd dZG dd dZdd Zdd Zedkrdee  dS dS )    N)OllamaEmbeddings)FAISS)ChatGroq)PromptTemplate)
ChatOllama)tqdm8gsk_CEh3itIpUAkEkEKsUDqVWGdyb3FYoTjqmXNTBHOSxJFK3obGTzXZc                 C   s   t dd}d|  d| d}tj|r;dd t|D }|r1tj||dd	}td
|  |S td| d d S td|  d| d d S )Nznomic-embed-text)modelzmy_embeddings//z/merged_faissc                 S   s   g | ]	}| d r|qS ))z.faissz.pkl)endswith).0f r   G/var/www/eduai.edurigo.com/question_generate/production/generate_mcq.py
<listcomp>   s    z'load_all_embeddings.<locals>.<listcomp>T)allow_dangerous_deserializationzEmbeddings loaded from zNo embedding files found in .z"No embeddings found for client_id z and reference_id )r   ospathisdirlistdirr   
load_localprint)	client_idreference_id
embeddingsreference_dirembedding_filesvectorsr   r   r   load_all_embeddings   s   
r   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )QuestionHistoryc                 C   s"   d| d| d| _ |  | _d S )Nzquestion_history/r
   z.json)	file_pathload	questions)selfr   r   r   r   r   __init__!   s   zQuestionHistory.__init__c                 C   s   t j| jrJz,t| jd}|  }|rtt	|nt W  d    W S 1 s,w   Y  W d S  tj
yI   td| j d t  Y S w td t S )NrzWarning: Invalid JSON in z!. Starting with an empty history.z6No history file found. Starting with an empty history.)r   r   existsr!   openreadstripsetjsonloadsJSONDecodeErrorr   )r$   r   contentr   r   r   r"   %   s   (
zQuestionHistory.loadc                 C   s\   t jt j| jdd t| jd}tt| j	| W d    d S 1 s'w   Y  d S )NT)exist_okw)
r   makedirsr   dirnamer!   r(   r,   dumplistr#   )r$   r   r   r   r   save2   s   "zQuestionHistory.savec                 C   s   | j | d S N)r#   addr$   questionr   r   r   r8   7   s   zQuestionHistory.addc                 C   s
   || j v S r7   )r#   r9   r   r   r   __contains__:      
zQuestionHistory.__contains__c                 C   s
   t | jS r7   )lenr#   )r$   r   r   r   __len__=   r<   zQuestionHistory.__len__N)	__name__
__module____qualname__r%   r"   r6   r8   r;   r>   r   r   r   r   r        s    r    c                   @   s   e Zd Zdd Zdd ZdS )RateLimiterc                 C   s   || _ || _g | _d S r7   )max_requestsperiodrequests)r$   rC   rD   r   r   r   r%   A   s   
zRateLimiter.__init__c                    sr   t     fddjD _tjjkr/j jd   }|dkr/t|I d H  jt    d S )Nc                    s   g | ]} | j k r|qS r   )rD   )r   reqnowr$   r   r   r   H   s    z$RateLimiter.wait.<locals>.<listcomp>r   )timerE   r=   rC   rD   asynciosleepappend)r$   
sleep_timer   rG   r   waitF   s   zRateLimiter.waitN)r?   r@   rA   r%   rN   r   r   r   r   rB   @   s    rB   c                    s>  t | |}|std g dfS t| || }|dkr$tdtd ntddd tdd	gd
dt	dtj
g }d}tddd fdd}t|dd}	t d }
t||k rt |
k r|t| }t|d}dtdd }||I d H }|D ]`}t||kr nW|j}z1||||I d H }|D ]#}t||k r|d vr|| |d  |d7 }|	d qW n ty } ztd|  W Y d }~qd }~ww t||kr nqt||k rt |
k sft||k rtdt| d| d W d    n	1 sw   Y    |tfS )Nz=No embeddings found for the given client ID and reference ID.r   zllama3-70b-8192)
model_namegroq_api_keyzhttp://127.0.0.1:11434z	llama3:8b)base_urlr	   contextnum_questionsa7  
        {context}

        Generate {num_questions} multiple-choice questions based on the provided context. Each question should include one correct answer and three plausible but incorrect options. Follow the format below:
        Q: <question>
        A) <option_a>
        B) <option_b>
        C) <option_c>
        D) <option_d>
        Correct: <correct_letter>

        Guidelines:

    Ensure questions are clear, relevant, and focused on key details from the context.
    Phrase questions concisely to avoid ambiguity.
    Avoid questions like "in this document" or "in this code" or "  in given document" unless accompanied by specific, relevant information.
    Make incorrect options (distractors) credible and similar in structure or content to the correct answer, encouraging thoughtful selection.

        )input_variablestemplatezFQ: (.+?)\nA\) (.+?)\nB\) (.+?)\nC\) (.+?)\nD\) (.+?)\nCorrect: ([A-D])      <   )rC   rD   c              
      s     I d H  j| |d} |gI d H }|j}g }|D ]/}| \}}	}
}}}| }|vrQ||t| ||	 |
 | | d|d q"|S )N)rR   rS   )ABCD)numberr:   optionscorrect_answer)	rN   formatainvoker/   finditergroupsr*   rL   r=   )rR   start_number
batch_sizepromptresponseresponse_strr#   matchr:   option_aoption_boption_coption_dcorrect_letterllm	mcq_regexmcq_templatequestion_historyrate_limiterr   r   process_context~   s,   
z%generate_mcq.<locals>.process_contextzGenerating MCQs)totaldesci,     zRandom query for retrieval i  r:   zError processing context: zWarning: Only generated z unique questions out of z requested.)r   r   r    as_retrieverr   GROQ_API_KEYr   r   recompileDOTALLrB   r   rI   r=   minrandomrandintra   page_contentrL   r8   update	Exceptionr6   )r   rS   r   GPUr   	retrievermcqsquestion_numberru   pbartimeoutremaining_questionsre   queryresultsresultrR   new_mcqsmcqer   ro   r   generate_mcqO   st   




r   c            	         s   d} d}d}d}t | |||I d H \}}|std d S td|  |D ].}td|d  d	|d
   |d  D ]\}}t| d|  q<td|d  d q&d S )Nrich      rV   z'No multiple-choice questions generated.zTotal questions in history: z	Question r]   z: r:   r^   z) zCorrect Answer: r_   
)r   r   items)	r   rS   r   r   r   history_countr   optiontextr   r   r   main   s    r   __main__)r   r   langchain_community.embeddingsr    langchain_community.vectorstoresr   langchain_groqr   langchain.promptsr   langchain_ollamar   r{   r,   rJ   r   rI   rz   r   r    rB   r   r   r?   runr   r   r   r   <module>   s*     h