o
    @)i                     @   s6   d dl Z d dlZd dlZd dlmZ G dd dZdS )    N)Listc                	   @   s   e Zd ZddedefddZdded	ed
eeef fddZded
efddZded
e	fddZ
ddee d	ed
eee ef fddZdS )
Translatorhttp://localhost:11434
gemma3:12b
ollama_urlmodelc                 C   s   || _ || _| d| _d S )Nz/api/generate)r   r   api_endpoint)selfr   r    r
   T/var/www/eduai.edurigo.com/language_translate_question_text/production/translator.py__init__   s   zTranslator.__init__Hinditexttarget_languagereturnc           	      C   s   z_|  s|dddfW S d| d| }| j|ddddg d	d
d}tj| j|ddidd}|jdkrX| }|dd  }| |}|dd|ddd}||fW S |dddfW S  t	yo   |dddf Y S w )Nr   )prompt_eval_count
eval_countz Translate the following text to z]. Provide ONLY the translated text, without any explanations, prefixes, or additional text:

Fg?g?i  )	IMPORTANTzInstructions:zText to translate:zHere'sExplanationz	Original:z**)temperaturetop_p
max_tokensstop)r   promptstreamoptionszContent-Typezapplication/json<   )jsonheaderstimeout   response r   r   )
stripr   requestspostr   status_coder   get_clean_response	Exception)	r	   r   r   r   payloadr!   resulttranslated_texttoken_usager
   r
   r   translate_text   sB   




zTranslator.translate_textc                 C   sv   g d}|D ]}t j|d|t jt jB t jB d}qt dd|}t dd|}t dd|}t d	d|}| }|S )
zBClean up LLM response to remove instruction bleeding and artifacts)z#IMPORTANT INSTRUCTIONS?:.*?(?=\n|$)zInstructions?:.*?(?=\n|$)zText to translate:.*?(?=\n|$)zHindi translation:.*?(?=\n|$)z![A-Za-z]+ translation:.*?(?=\n|$)zHere's the.*?(?=\n|$)z keeping the HTML tags.*?(?=\n|$)z\*\*.*?\*\*zExplanation:.*?(?=\n|$)z\* .*?(?=\n|$)z- Keep all HTML.*?(?=\n|$)z- Only translate.*?(?=\n|$)z- Preserve.*?(?=\n|$)z- If there are.*?(?=\n|$)z<[^>]*?\s+[^>]*?>zOriginal:.*?(?=\n|$)z'So, if the original text was.*?(?=\n|$)z+The translated version would be:.*?(?=\n|$)r"   )flagsz<\s+([^>]+)\s*>z<\1>z<([^>]+)\s+>z\n\s*\n\s*\n+
z\n\s+)resub
IGNORECASE	MULTILINEDOTALLr#   )r	   r   cleanup_patternspatternr
   r
   r   r(   =   s   "zTranslator._clean_responsec                 C   s   |sdS t |d S )zs
        Estimate token count for given text
        Basic estimation: ~4 characters per token for English
        r      )len)r	   r   r
   r
   r   count_tokensc   s   zTranslator.count_tokens	sentencesc           
      C   sh   g }d}d}|D ]}|  ||\}}|| ||dd7 }||dd7 }q|||| d}	||	fS )Nr   r   r   )input_tokensoutput_tokenstotal_tokens)r.   appendr'   )
r	   r;   r   translated_sentencestotal_prompt_tokenstotal_completion_tokenssentence
translatedr-   token_summaryr
   r
   r   translate_arrayl   s   
zTranslator.translate_arrayN)r   r   )r   )__name__
__module____qualname__strr   tupledictr.   r(   intr:   r   rF   r
   r
   r
   r   r      s     1&,	r   )r1   r$   r   typingr   r   r
   r
   r
   r   <module>   s
    