o
    ag2                     @   s   d dl Z d dlmZ d dlmZ d dlmZ eddgdZe jede 	 d	d
 Z
e
de jddddde jdddddde jddddde jddddde jdddd dd!d" ZdS )#    N)tqdm)word_tokenize)parallelize_preprocessz-hz--help)help_option_names)context_settingsc                   C   s   d S )N r   r   r   b/var/www/eduai.edurigo.com/doc_train/edurigo_ai/Puru/venv/lib/python3.10/site-packages/nltk/cli.pycli   s   r	   tokenizez
--languagez-lenz1The language for the Punkt sentence tokenization.)defaulthelpz--preserve-lineTzIAn option to keep the preserve the sentence and not sentence tokenize it.)r   is_flagr   z--processesz-j   zNo. of processes.z
--encodingz-eutf8zSpecify encoding of file.z--delimiterz-d z%Specify delimiter to join the tokens.c           	   	   C   s   t jd|dY}t jd|d9}|dkr*t| D ]}t|t|d|d qntt| |ddD ]}t||d|d q4W d	   n1 sKw   Y  W d	   d	S W d	   d	S 1 scw   Y  d	S )
z;This command tokenizes text stream using nltk.word_tokenizestdin)encodingstdoutr   
)endfileT)progress_barN)clickget_text_streamr   	readlinesprintjoinr   r   )	languagepreserve_line	processesr   	delimiterfinfoutlineoutliner   r   r   tokenize_file   s   
"r&   )r   r   nltkr   	nltk.utilr   dictCONTEXT_SETTINGSgroupversion_optionr	   commandoptionr&   r   r   r   r   <module>   s8   
