o
    oh-                  
   @   s|  d dl mZ d dlmZ d dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlm	Z	 d dlmZ d dlmZ d dlZd dlZd dlZd dl Z e Z!d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( e!)ddd Z*	 G dd deZ+G dd deZ,dd Z-e!j.de+de,ide,iddedededfde/de/de/fd d!Z0	 d dl1Z1e!.d"ededfde/de/fd#d$Z2	 d d%l3m3Z3 G d&d' d'eZ4e!.d(ededede fde5d)e5de5d*e5fd+d,Z6	 	 d d-l7m8Z8 G d.d/ d/eZ9G d0d deZ,e!j.d1e9de,ide,iddededfde/d2e/fd3d4Z:	 d d5l;m<Z<m=Z=m>Z>m?Z?m@Z@ G d6d7 d7eZAG d8d9 d9eZBd:d; ZC	 	 e!j.d<eAdeBideBiddedededfde/de/de/fd=d!Z0	 d d>lDmDZD G d?d' d'eZ4e!.d@ededede fde5d)e5de5d*e5fdAd,Z6	 dS )B    )HTTPException)NamedTemporaryFileN)List)FastAPI
UploadFileFileFormBody)JSONResponse)Document)RecursiveCharacterTextSplitter)	BaseModel)OllamaEmbeddings)AudioSegment)r   )BytesIO)load_pdf split_text_with_semantic_chunkercount_tokens_in_documentssave_documents_to_txtcreate_and_save_embeddingsmerge_all_faissz)/helloEdurigoAiChatAndGenQuestionsTestingc                   C   s   ddiS )Nmessagez!Hello from edurigo_ai testing env r   r   r   R/var/www/eduai.edurigo.com/question_generate/testing/question_generate_fast_api.pyhello   s   r   c                   @      e Zd ZU eed< eed< dS )TrainResponser   token_countN__name__
__module____qualname__str__annotations__intr   r   r   r   r   %      
 r   c                   @      e Zd ZU eed< dS ErrorResponseerrorNr   r    r!   r"   r#   r   r   r   r   r(   )      
 r(   c              
   C   N   zt jddd| gddd}|jW S  t jy& } z	tdd| dd }~ww 	Ncurlz-kz-sT)capture_outputcheck  zError fetching document: status_codedetail
subprocessrunstdoutCalledProcessErrorr   urlresulter   r   r   fetch_document_with_curl-   s   r>   z/train_with_document_edurigo_aimodel)r1     )response_model	responses.	client_iddocument_urlreference_idc              
   C   sl  t j|d  }|dvrtdddzz~t|}tjd|d}|| |j	}t
d|  W d    n1 s;w   Y  t|}t
t| d	 td
d}t||}	t
d t
dt|	 d d|  d| }
t|	|
 t|	| | t| |}t|}d|dW W t j|rt | S S  ty } z	tdd| dd }~ww t j|rt | w w )N   )z.pdf.txtz.docxz.docz.pptxz.pptr1   zRUnsupported file type. Only .pdf, .txt, .docx, .doc, .pptx and .ppt are supported.r2   F)deletesuffixzTemporary file saved at: z  pages loaded from the document.znomic-embed-text)r?   z-Text successfully split into semantic chunks.zDocuments after splitting: z chunks created.ztemp/_z7Training and embedding creation completed successfully.r   r   r@   Processing failed: )ospathsplitextlowerr   r>   tempfiler   writenameprintr   lenr   r   r   r   r   r   existsremove	Exception)rC   rD   rE   file_extensiondocument_content	temp_filetemp_file_pathdocs
embeddingssplit_documents
output_dirmerged_embeddingsr   r=   r   r   r   process_document5   s@   




rb   z/untrain_document_edurigo_aic              
      s   z.d| }t j|| }t j|r*t j|r t | nt| ddiW S tddd t	yC } ztdt
|dd }~ww )Nzmy_embeddings/r   zDocument untrained successfullyi  zDocument not foundr2   r@   )rM   rN   joinrV   isfilerW   shutilrmtreer   rX   r"   )rE   rC   
local_path	file_pathr=   r   r   r   delete_documenth   s   


ri   generate_mcqc                   @   .   e Zd ZU eed< eed< eed< eed< dS GenerateMCQRequestrC   num_questionsrE   isGPUNr   r    r!   r$   r#   r   r   r   r   rn   }   
   
 rn   z/generate_mcqro   rp   c                    s$   t | |||I d H \}}||dS )N)history_countmcqsrj   )rC   ro   rE   rp   rt   rs   r   r   r   generate_mcq_endpoint   s   
ru   )answer_questionc                   @   s2   e Zd ZU eed< eed< eed< ee ed< dS )ChatResponseanswerprocessing_timetotal_tokenssource_documentsN)r   r    r!   r"   r#   floatr$   r   r   r   r   r   rw      s
   
 rw   c                   @   r&   r'   r*   r   r   r   r   r(      r+   z/chat_with_edurigo_aiquestionc              
      s   zt t| |\}}}}t||||dW S  ty) } ztdt|dd }~w ty? } ztddt| dd }~ww )N)rx   ry   rz   r{   r1   r2   r@   zAn error occurred: )rv   r$   rw   FileNotFoundErrorr   r"   rX   )rC   r}   rx   source_reference_idsrz   ry   r=   r   r   r   chat_with_document   s    r   )convert_video_to_audioaudio_to_textload_and_split_document1save_faiss_indexcount_tokens_videoc                   @   r   )TrainResponse1r   r   Nr   r   r   r   r   r      r%   r   c                   @   r&   )ErrorResponse1r)   Nr*   r   r   r   r   r      r+   r   c              
   C   r,   r-   r5   r:   r   r   r   fetch_document_with_curl1   s   r   z/train_with_video_edurigo_aic              
      s  t j|d  }|dvrtdddd| }zzd| d}| d	}d
|  d| }t|}t|d}	|	| W d    n1 sHw   Y  d}
|
dkrYtdddt|| t	||}t
|}|sotdddt|}d
|  d| }t j|dd t|| d|dW W t j|rt | t j|rt | S S  ty } ztddt| dd }~ww t j|rt | t j|rt | w w )NrF   )z.mp4r1   z+Unsupported file type. Only .mp4 supported.r2   temp_documentz
mp3-files/z.mp3rG   zmy_embeddings_video//wb      zVideo file too large.zNo valid documents to process.T)exist_okSuccessrK   r@   rL   )rM   rN   rO   rP   r   r   openrR   r   r   r   r   makedirsr   rV   rW   rX   r"   )rC   rD   rE   rY   r\   mp3_output_pathoutput_filefaiss_save_pathrZ   r[   video_durationtextfinal_documentsr   folder_pathr=   r   r   r   rb      sP   






generate_mcq_videoc                   @   rl   rm   rq   r   r   r   r   rn     rr   z/generate_mcq_videoc                    s(   t | |||I d H \}}}|||dS )N)rs   rt   r   r   )rC   ro   rE   rp   rt   rs   r   r   r   r   ru     s   )Ehttp.clientr   rQ   r   	tracebacktypingr   fastapir   r   r   r   r	   fastapi.responsesr
   langchain.docstore.documentr   langchain.text_splitterr   pydanticr   langchain_community.embeddingsr   uvicornpydubr   fastapi.datastructuresior   r6   rM   asynciorequestsapptrainingr   r   r   r   r   r   getr   r   r(   r>   postr"   rb   re   ri   rk   rn   r$   ru   chattingrv   rw   r   training_videor   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s|     
,-"4",34