o
    m-f>                     @   s   d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ dd Zdd Zdd Zdd	 Zd
d Zdd Zdd ZdS )    )summary_modelsummary_tokenizerquestion_modelquestion_tokenizersentence_transformer_models2vnormalized_levenshteindevicepostprocesstextnltk	stopwordsstringpkeKeywordProcessorcosine_similarityset_seedrandomnpc              
      s2   zt | dW S  ty } ztd|d }~ww )N	Sense2VeczError generating questions)generate_question	Exception
ValueError)contexte r   J/var/www/chatrigo.edurigo1.com/get_recommendations/testing/main_dynamic.pygenerate_question_async   s   
r   c           
         s   t j }|j| dd ddh}ttj}|g d7 }|td7 }|j	|d |j
dd	d
d |jdd}dd |D }t }|D ]}|| qB||  fdd|D }	|	d d S )Nen)inputlanguagePROPNNOUN)z-lrb-z-rrb-z-lcb-z-rcb-z-lsb-z-rsb-english)posg?g      ?average)alpha	thresholdmethod   nc                 S   s   g | ]}|d  qS r   r   .0valr   r   r   
<listcomp>       z get_keywords.<locals>.<listcomp>c                    s   g | ]}| v r|qS r   r   )r-   keywordkeywords_foundr   r   r/      s       )r   unsupervisedMultipartiteRankload_documentlistr   punctuationr   wordscandidate_selectioncandidate_weighting
get_n_bestr   add_keywordextract_keywords)
originaltextsummarytext	extractorr#   stoplist
keyphraseskeywordskeyword_processorr1   important_keywordsr   r2   r   get_keywords   s    


rH   c              	      sz   d | |} j|dddddt}|d |d }}|j||dd	d
ddd} fdd|D }	|	d dd }
|
S )Nzcontext: {} answer: {}i  FTpt)
max_lengthpad_to_max_length
truncationreturn_tensors	input_idsattention_mask         H   )rN   rO   early_stopping	num_beamsnum_return_sequencesno_repeat_ngram_sizerJ   c                    s   g | ]	} j |d dqS )T)skip_special_tokens)decode)r-   ids	tokenizerr   r   r/   "   s    z get_question.<locals>.<listcomp>r   z	question: )formatencode_plustor	   generatereplacestrip)r   answermodelr\   textencodingrN   rO   outsdecquestionr   r[   r   get_question   s   rk   c           
         s   g }z|j | g dd |j |d} fdd|D }W n   g }Y d}| g}| }|D ]}	t|d  |	 |k rL|	|vrL|	|vrL||	 q/|dd  S )	N)r!   PERSONPRODUCTLOCORGEVENTNORPzWORK OF ARTFACGPENUMFACILITY)sensesr)   c                    sN   g | ]#}|d   dd   dd kr|d   dd  dd  qS )r   |rQ   _ )splitrb   titlerc   r,   senser   r   r/   +   s   N z'sense2vec_get_words.<locals>.<listcomp>g333333?r   rQ   )get_best_sensemost_similarrz   r   
similaritylowerappend)
wordr   topnrj   outputr   r&   final	checklistxr   r|   r   sense2vec_get_words&   s   ,
r   c                    s   t || }t |}t|g  fddttD }t|d D ]8}||d d f }	tj|| d d  f dd}
||	 d| |
dd  }|t| } | || q"fdd D S )Nc                    s   g | ]
}| d  kr|qS r+   r   )r-   i)keywords_idxr   r   r/   :   s    zmmr.<locals>.<listcomp>rQ   )axisc                    s   g | ]} | qS r   r   )r-   idx)r:   r   r   r/   B   r0   )	r   r   argmaxrangelenmaxreshaper   remove)doc_embeddingword_embeddingsr:   top_nlambda_paramword_doc_similarityword_similaritycandidates_idxrx   candidate_similaritiestarget_similaritiesmmrmmr_idxr   )r   r:   r   r   6   s   

r   c                 C   s   t | |||}t|dkr|S |  g}|| |d |   }||g}	||}
tt|d}t|	|
|||}|  g}|D ]}| |  krS||  qB|dd  }|S )Nr   ry   rP   rQ   )	r   r   
capitalizeextendencodeminr   r   r   )r   origsentencesense2vecmodelsentencemodelr   	lambdavaldistractorsdistractors_newembedding_sentencekeyword_embeddingdistractor_embeddingsmax_keywordsfiltered_keywordsr   wrdr   r   r   get_distractorsD   s"   



r   c           
      C   s   t | ttt}t| |}g }|D ]/}t||tt}t||t	t
dd}|d d }| g| }	t|	 ||| |	d q|S )N(   g?   )rj   correct_answeroptions)
summarizerr   r   r	   rH   rk   r   r   r   r   r   r   r   shuffler   )
r   radiobuttonsummary_textrE   r   rd   rj   r   selected_distractorsr   r   r   r   r   V   s   

r   N)main_staticr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   rH   rk   r   r   r   r   r   r   r   r   <module>   s   P	