o
    Jph$                     @   s  d dl mZ d dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlZeddZdZG dd deZdd Z	 dd Z dd Z!	 	 dd Z"dd Z#dd Z$dd Z%dd Z&e'd kre&  dS dS )!    )GroqN)VideoFileClip)AudioSegment)TokenTextSplitterDocument)RecursiveCharacterTextSplitter)PyPDFLoader
TextLoaderDocx2txtLoaderUnstructuredPowerPointLoader)FAISS)OllamaEmbeddings8gsk_CEh3itIpUAkEkEKsUDqVWGdyb3FYoTjqmXNTBHOSxJFK3obGTzXZ)api_keyzwhisper-large-v3c                   @   s   e Zd ZdS )VideoTooLargeExceptionN)__name__
__module____qualname__ r   r   I/var/www/eduai.edurigo.com/question_generate/production/training_video.pyr      s    r   c                 C   sN   t | }|jd }t| tj| dd}|j|dd td|  d|  d S )N<   mp4)formatmp3z
Converted z to )r   durationprintr   	from_fileexport)
video_pathmp3_output_pathclipvideo_durationaudior   r   r   convert_video_to_audio   s   
r$   c                 C   s   t j|r)td|  t|ddd}| }W d    n1 s#w   Y  nHtd|   t| d}tjjj	| | ft
d}W d    n1 sMw   Y  |j}t|ddd}|| W d    n1 slw   Y  d	|d
}|S )NzReading translation from rutf-8encodingzGenerating translation for rb)filemodelw
z. )ospathexistsr   openreadclientr#   translationscreater+   textwritejoinsplit)filepathoutput_fileftranslation_textr*   translationformatted_textr   r   r   audio_to_textQ   s&   

r@   c                 C   s@   | j jjdd| ddd|dgdd}t|jd jj d S )	NsystemzjUse this transcript or transcripts to answer any user questions, citing specific quotes:

                z
                )rolecontentuserzllama3-8b-8192)messagesr+   r   )chatcompletionsr5   r   choicesmessagerC   )r3   
transcriptuser_questionchat_completionr   r   r   transcript_chat_completionk   s   rM   c              
      s   z2t  ddd}| }W d    n1 sw   Y  tddd}||} fdd|D }|W S  tyG } z	td	d
| dd }~ww )Nr%   r&   r'   i     )
chunk_sizechunk_overlapc                    s   g | ]
}t |d  idqS )source)page_contentmetadatar   ).0chunk	file_pathr   r   
<listcomp>   s    z,load_and_split_document1.<locals>.<listcomp>i  zError processing document: )status_codedetail)r1   r2   r   
split_text	Exception)rW   r*   r6   text_splitterchunks	documentser   rV   r   load_and_split_document1   s   

ra   c                 C   s2   t dd}t| |}|| td|  d S )Nznomic-embed-text)r+   zFAISS index saved to )r   r   from_documents
save_localr   )r_   faiss_save_pathembedding_function	docsearchr   r   r   save_faiss_index   s   

rg   c                 C      t dd | D S )Nc                 s       | ]
}t |j V  qd S NlenrR   r9   rT   docr   r   r   	<genexpr>       z%count_tokens_video.<locals>.<genexpr>sumr_   r   r   r   count_tokens_video      rt   c                 C   rh   )Nc                 s   ri   rj   rk   rm   r   r   r   ro      rp   zcount_tokens.<locals>.<genexpr>rq   rs   r   r   r   count_tokens   ru   rv   c            
      C   s   d} d}d}d}d}d| d| }t   }t| | t|| t|}	 t|| t   }t|}	td|| d	d
 td|	  d S )NzGaur_Gopal.mp4zmp3-files/Gaur_Gopal.mp3zGaur_Gopal.txtB   
Gaur_Gopalzmy_embeddings_video//zTraining process took z.2fz secondsz(Total tokens processed during training: )timer$   r@   ra   rg   rv   r   )
r   r    r;   	client_idreference_idfolder_path
start_timer_   end_timetoken_countr   r   r   main   s    


r   __main__)(groqr   r.   pandaspdnumpynpmoviepyr   pydubr   ffmpeglangchain.text_splitterr   langchain.docstore.documentr   r   $langchain_community.document_loadersr	   r
   r   r    langchain_community.vectorstoresr   langchain_community.embeddingsr   rz   r3   r+   r\   r   r$   r@   rM   ra   rg   rt   rv   r   r   r   r   r   r   <module>   s>    
	/%(
