o
    3-i                     @   s6   d dl Z d dlZd dlZd dlmZ G dd dZdS )    N)Listc                	   @   s   e Zd ZddedefddZdded	ed
eeef fddZded
efddZded
e	fddZ
ddee d	ed
eee ef fddZdS )
Translatorhttp://localhost:11434
gemma3:12b
ollama_urlmodelc                 C   s   || _ || _| d| _d S )Nz/api/generate)r   r   api_endpoint)selfr   r    r
   >/var/www/eduai.edurigo.com/lang_trans_text/hindi_translator.py__init__   s   zTranslator.__init__Hinditexttarget_languagereturnc           	      C   s   z_|  s|dddfW S d| d| }| j|ddddg d	d
d}tj| j|ddidd}|jdkrX| }|dd  }| |}|dd|ddd}||fW S |dddfW S  t	yo   |dddf Y S w )Nr   )prompt_eval_count
eval_countzTranslate this text to zj. Provide only the translated text, no explanations or additional content. Keep all HTML tags unchanged:

Fg?g?i  )	IMPORTANTzInstructions:zText to translate:)temperaturetop_p
max_tokensstop)r   promptstreamoptionszContent-Typezapplication/json<   )jsonheaderstimeout   response r   r   )
stripr   requestspostr   status_coder   get_clean_response	Exception)	r	   r   r   r   payloadr    resulttranslated_texttoken_usager
   r
   r   translate_text   sB   




zTranslator.translate_textc                 C   sb   g d}|D ]}t j|d|t jt jB d}qt dd|}t dd|}t dd|}| }|S )	zBClean up LLM response to remove instruction bleeding and artifacts)
z#IMPORTANT INSTRUCTIONS?:.*?(?=\n|$)zInstructions?:.*?(?=\n|$)zText to translate:.*?(?=\n|$)zHindi translation:.*?(?=\n|$)z![A-Za-z]+ translation:.*?(?=\n|$)z- Keep all HTML.*?(?=\n|$)z- Only translate.*?(?=\n|$)z- Preserve.*?(?=\n|$)z- If there are.*?(?=\n|$)z<[^>]*?\s+[^>]*?>r!   )flagsz<\s+([^>]+)\s*>z<\1>z<([^>]+)\s+>z\n\s*\n\s*\nz

)resub
IGNORECASE	MULTILINEr"   )r	   r   cleanup_patternspatternr
   r
   r   r'   =   s   zTranslator._clean_responsec                 C   s   |sdS t |d S )zs
        Estimate token count for given text
        Basic estimation: ~4 characters per token for English
        r      )len)r	   r   r
   r
   r   count_tokensZ   s   zTranslator.count_tokens	sentencesc           
      C   sh   g }d}d}|D ]}|  ||\}}|| ||dd7 }||dd7 }q|||| d}	||	fS )Nr   r   r   )input_tokensoutput_tokenstotal_tokens)r-   appendr&   )
r	   r8   r   translated_sentencestotal_prompt_tokenstotal_completion_tokenssentence
translatedr,   token_summaryr
   r
   r   translate_arrayc   s   
zTranslator.translate_arrayN)r   r   )r   )__name__
__module____qualname__strr   tupledictr-   r'   intr7   r   rC   r
   r
   r
   r   r      s     1,	r   )r/   r#   r   typingr   r   r
   r
   r
   r   <module>   s
    