o
    agx                     @   s   d dl Z d dlZ	 edZi ddddddddd	d
ddddddddddddddddddddddd d!d!d"dd#d$d%d&d'd(d)	Ze d*e jZe d+e jZd0d.d/Z	dS )1    N($  aftncnaftnsepaftnsepc
annotation	atnauthoratndateatnicnatnid	atnparentatnrefatntimeatrfend	atrfstartauthor
backgroundbkmkend	bkmkstartblipuidbuptimcategorycolorschememappingcolortblcommentcompanycreatim	datafield	datastoredefchpdefpapdodoccommdocvar
dptxbxtextebcendebcstartfactoidnamefaltfchars	ffdeftext
ffentrymcr	ffexitmcrffformat
ffhelptextfflffname
ffstattextfilefiletblfldinstfldtypefnamefontembfontfilefonttblfooterfooterffooterlfooterrfootnote	formfieldftncnftnsepftnsepcg	generatorgridtblheaderheaderfheaderlheaderrhlhlfr	hlinkbasehllochlsrchsvhtmltaginfokeycodekeywordslatentstyleslcharslevelnumbers	leveltextlfolevellinkvallist	listlevellistnamelistoverridelistoverridetablelistpictureliststylename	listtablelisttextlsdlockedexceptmaccmaccPr	mailmergemalnmalnScrmanagermargPrmbarmbarPrmbaseJcmbegChr
mborderBoxmborderBoxPrmboxmboxPrmchrmcountmctrlPrmdmdegmdegHidemdenmdiffmdPrmemendChrmeqArrmeqArrPrmfmfNamemfPrmfuncmfuncPr	mgroupChrmgroupChrPrmgrowmhideBot	mhideLeft
mhideRightmhideTopmhtmltagmlimmlimlocmlimlow	mlimlowPrmlimupp	mlimuppPrmmmmaddfieldnamemmath	mmathPictmmathPrmmaxdistmmcmmcJcmmconnectstrmmconnectstrdatammcPrmmcsmmdatasourcemmheadersourcemmmailsubjectmmodsommodsofiltermmodsofldmpdatammodsomappedname
mmodsonamemmodsorecipdata
mmodsosort	mmodsosrcmmodsotable	mmodsoudlmmodsoudldatammodsouniquetagmmPrmmquerymmrmnarymnaryPrmnoBreakmnummobjDistmoMath
moMathParamoMathParaPrmopEmumphantmphantPrmplcHidemposmrmradmradPrmrPrmsepChrmshowmshpmsPremsPrePrmsSubmsSubPrmsSubSup
msSubSupPrmsSupmsSupPrmstrikeBLTRmstrikeHmstrikeTLBRmstrikeVmsubmsubHidemsupmsupHidemtranspmtypemvertJcmvfmfmvfmlmvtofmvtolmzeroAsc	mzeroDescmzeroWidnesttablepropsnextfilenonesttablesobjaliasobjclassobjdataobjectobjnameobjsectobjtime	oldcprops	oldpprops	oldsprops	oldtpropsoleclsidoperatorpanosepasswordpasswordhashpgppgptblpicproppictpnpnseclvlpntextpntxtapntxtbprintimprivatepropnameprotend	protstartprotusertblpxeresultrevtblrevtimrsidtblrxeshpshpgrpshpinstshppictshprsltshptxtsnsp	staticval
stylesheetsubjectsvsvbtctemplate	themedatatitletxeudupr	userpropswgrffmtfilterwindowcaptionwritereservationwritereservhashxexformxmlattrnamexmlattrvaluexmlclosexmlnamexmlnstblxmlopenpar
sectz

pagelinetab	emdashu   —endashu   –emspaceu    enspaceu    qmspaceu    bulletu   •lquoteu   ‘rquoteu   ’	ldblquoteu   “	rdblquoteu   ”rowcell|    {}\   ­u   ‑)	nestcell~r'  r;  r<  r=  r>  -_zO\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)zP(\{\\field\{\s*\\\*\\fldinst\{.*HYPERLINK\s(\".*\")\}{2}\s*\{.*?\s+(.*?)\}{2,3})cp1252strictc              	   C   s  t td| } g }d}d}d}d}d}t| D ]}	|	 \}
}}}}}|r6|s6|t|j||d7 }d}|rXd}|dkrF|	||f q|d	krW|rS|
 \}}qd}d
}q|rpd}|tv ri|sh|t| 7 }q|dkrod
}q|
rd}|
tv r{d
}n|
dkrd| }zt| W n ty   d}Y nw |rq|
tv r|t|
 7 }q|
dkrt|}q|
dkr|du r|}qt|}|dk r|d7 }|t|7 }|}q|r|dkr|d8 }q|st|d}|s|}q||7 }q|r|dkr|d8 }q|s||7 }q|S )aW   Converts the rtf text to plain text.

    Parameters
    ----------
    text : str
        The rtf text
    encoding : str
        Input encoding which is ignored if the rtf file contains an explicit codepage directive, 
        as it is typically the case. Defaults to `cp1252` encoding as it the most commonly used.
    errors : str
        How to handle encoding errors. Default is "strict", which throws an error. Another
        option is "ignore" which, as the name says, ignores encoding errors.

    Returns
    -------
    str
        the converted rtf text as a python unicode string
    z\1(\2)F   r   N )encodingerrorsr<  r=  T*ansicpgcputf8ucui      )resub
HYPERLINKSPATTERNfinditergroupsbytesfromhexdecodeappendpopspecialcharsdestinationscodecslookupLookupErrorintchr)textrH  rI  stack	ignorableucskipcurskiphexesoutmatchwordarg_hexcharbracetcharc rr  k/var/www/eduai.edurigo.com/doc_train/edurigo_ai/Puru/venv/lib/python3.10/site-packages/striprtf/striprtf.pyrtf_to_textb   s   




rt  )rD  rE  )
rQ  r^  	frozensetr]  r\  compile
IGNORECASErT  rS  rt  rr  rr  rr  rs  <module>   st    .	
