o
    Fg1                  	   @   sx   d dl mZ d dlZd dlmZ d dlmZmZm	Z	 ddede
fd	d
Zdd Zddeee  dede
de	fddZdS )    )ListN)sort_blocks)PageBboxTables{Gz?d   pageimg_sizec              	   C   sH  g }| d D ]}|d D ]}|d D ]}}t dt|d D ]q}|d |d  }	|d | }
| d dkrG||
d d	 |	d d
  |d	   q| d dkra||
d d |	d d  |d   q| d dkr{||	d d	 |
d d
  |d	   q||	d d |
d d  |d   qqqqt||krt|d}|S |}|S )Nblockslinesspans   charsrotationZ   bboxr              P   )rangelenappendnp
percentile)r	   r
   default_thresh	min_charsspace_distsblocklinespanichar1char2cell_gap_thresh r'   h/var/www/eduai.edurigo.com/doc_train/edurigo_ai/Puru/venv/lib/python3.10/site-packages/pdftext/tables.pyget_dynamic_gap_thresh   s*   ((((r)   c              	      s@  d fdd	}|dkr/t || d |d ddd|| d |d	 d|| d |d dd
dgS |dkrVt || d |d ddd|| d |d d|| d |d dd
dgS |dkr}t || d |d ddd|| d	 |d d|| d |d dd
dgS t || d |d ddd|| d |d d|| d |d dd
dgS )Nr   Tc                    s,   |rt ndd }|| |  |  | k S )Nc                 S   s   | S )Nr'   )xr'   r'   r(   <lambda>   s    z7is_same_span.<locals>.normalized_diff.<locals>.<lambda>)abs)ab	dimensionmultuse_absfuncr
   space_threshr'   r(   normalized_diff   s   z%is_same_span.<locals>.normalized_diffr   r   F)r1   r      )r0   r   r   r   )r   T)all)r   curr_boxr
   r4   r   r5   r'   r3   r(   is_same_span   s0   r9   皙?tablesreturnc                 C   s  t dd | D sJ dt|dksJ dg }t|t|||d}| D ]}t|d}g }|d }	|d	 D ]}
|
d
 D ]}t|d d||}|||k rQq<d }d }|d D ]i}|d D ]b}t|d d||j}d}|ryt|||||	}|d u r|d }|}q_|r||d 7 }t	|d |d t	|d |d t|d |d t|d |d g}q_|
 r|||d |d }|}q_qY|d ur|
 r|||d q<q6|D ]*}|d d |d  |d d |d  |d d |d  |d d |d  g|d< qt|}|| q%|S )Nc                 s   s    | ]	}t |d kV  qdS )   N)r   ).0tabler'   r'   r(   	<genexpr>>   s    z"table_cell_text.<locals>.<genexpr>zJTables must be a list of 4 ints representing the bounding box of the tabler   zQimg_size must be a list of 2 ints representing the image dimensions width, height)r   )r   r   r   r   r   r   r   Fcharr   r   r   )textr   )r7   r   maxr)   r   rescaleintersection_pctr   r9   minstripr   r   )r;   r	   r
   table_threshr4   table_textsr?   
table_poly
table_textr   r    r!   	line_bbox	curr_spanr8   r"   rA   r   	same_spanitemr'   r'   r(   table_cell_text;   s^   
  
rP   )r   r   )r:   r   )typingr   numpyr   pdftext.postprocessingr   pdftext.schemar   r   r   listr)   r9   intrP   r'   r'   r'   r(   <module>   s    (