o
    Ui3                     @   sf  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZmZmZmZ ddlmZ ddlmZmZ ee jZed Zejdd	 ed
 Zg dZh dZdZej ej!ddd e"dZ#G dd dZ$e$ Z%G dd dZ&de'de'dee' fddZ(dd Z)e*dkre+d e(dd Z,e+d!e,re-e,nd  dS dS )"a  
================================================================================
 STORIGO IMAGE GENERATOR v2.0 (Professional Rewrite)
================================================================================
 - Strict reliance on 'visualization_suggestion' (Usage of synonyms removed)
 - Global Deduplication (Persists across runs)
 - Multi-strategy Bing Scraping (Robust & Fast)
 - High Quality Filtering
================================================================================
    N)Path)OptionalListDictSet)BeautifulSoup)
quote_plusurlparsegenerated_images_for_storigoT)exist_okzimage_history_v2.json)zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0>   iconlogo123rfalamystocksketchvectorcartoonclipartdrawingshutter	watermark
dreamstimedepositphotos   z%%(asctime)s - [STORIGO] - %(message)sz%H:%M:%S)levelformatdatefmt
StorigoImgc                   @   sl   e Zd ZdZdd Zdd Zdd Zded	efd
dZddeded	e	fddZ
dedefddZdd ZdS )HistoryManagerzAManages persistent history of used image URLs and content hashes.c                 C   s   t  | _t  | _|   d S N)set	used_urlsused_hashes_loadself r'   H/var/www/eduai.edurigo.com/storigo/production/storigo_image_generator.py__init__@   s   zHistoryManager.__init__c              
   C   s   t  rczCtt ddd}t|}t|dg | _t|dg | _W d    n1 s.w   Y  t	
dt| j dt| j d W d S  tyb } zt	d	|  W Y d }~d S d }~ww d S )
Nrutf-8encodingurlshasheszLoaded History: z URLs, z HasheszFailed to load history: )HISTORY_FILEexistsopenjsonloadr!   getr"   r#   loggerinfolen	Exceptionerror)r&   fdataer'   r'   r(   r$   E   s   
*zHistoryManager._loadc              
   C   s   z3t | jt | jt| jd}ttddd}tj||dd W d    W d S 1 s,w   Y  W d S  tyN } zt	
d|  W Y d }~d S d }~ww )N)r.   r/   countwr+   r,      )indentzFailed to save history: )listr"   r#   r8   r2   r0   r3   dumpr9   r6   r:   )r&   r<   r;   r=   r'   r'   r(   saveP   s   &zHistoryManager.saveurlreturnc                 C   sV   z#|   }|dddd}|dd}|dd }|dW S    | Y S )z5Robust normalization to catch variations of same URL.zhttps:// zhttp://zwww.?r   /)lowerstripreplacesplitrstrip)r&   rE   r'   r'   r(   	normalize\   s   zHistoryManager.normalizerG   content_hashc                 C   s<   |  |}|| jv rdS || jv rdS |r|| jv rdS dS )NTF)rO   r"   r#   )r&   rE   rP   
norm_inputr'   r'   r(   is_usedl   s   


zHistoryManager.is_usedc                 C   s:   | j | | | j | |r| j| |   d S r    )r"   addrO   r#   rD   )r&   rE   rP   r'   r'   r(   	mark_used   s
   zHistoryManager.mark_usedc                 C   s,   | j   | j  t rt  d S d S r    )r"   clearr#   r0   r1   unlinkr%   r'   r'   r(   rU      s
   

zHistoryManager.clearN)rG   )__name__
__module____qualname____doc__r)   r$   rD   strrO   boolrR   rT   rU   r'   r'   r'   r(   r   =   s    r   c                   @   sh   e Zd ZdZdd Zdd Zddeded	ee	 fd
dZ
ded	ee	 fddZded	ee fddZdS )BingImageFinderz(Robust Bing Image Search implementation.c                 C   s&   t  | _| jjddddd d S )NzJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8zen-US,en;q=0.5z
keep-alive1)AcceptzAccept-Language
ConnectionzUpgrade-Insecure-Requests)requestsSessionsessionheadersupdater%   r'   r'   r(   r)      s   
zBingImageFinder.__init__c                 C   s   dt tiS )Nz
User-Agent)randomchoiceUSER_AGENTSr%   r'   r'   r(   _get_random_header   s   
z"BingImageFinder._get_random_headerr   queryattemptrF   c              
   C   s   |  }d}dt| dd|d   }|d7 }|| }z)| jj|   | jj|dd}|jd	kr?t	d
|j  g W S | 
|jW S  tya } ztd|  g W  Y d}~S d}~ww )z!Performs a search on Bing Images.z"https://www.bing.com/images/searchz?q=z&first=      z&qft=+filterui:photo-photo
   timeout   zBing search failed: zSearch error: N)rK   r   rc   rd   re   ri   r5   status_coder6   warning_parse_htmltextr9   r:   )r&   rj   rk   base_urlparamsfull_urlresponser=   r'   r'   r(   search   s"   
zBingImageFinder.searchhtmlc           
      C   s   t |d}g }|jdddD ];}z4t|dd}|d}|d}|d	d
}|rC||||t|ddt|ddd W q   Y q|sbtd|}	|	D ]}||	dd
d qT|S )z)Extracts image candidates from Bing HTML.zhtml.parseraiusc)class_mz{}murlturltrG   mwr   mh)rE   thumbtitlewidthheightzmurl&quot;:&quot;(.*?)&quot;rl   )rE   r   )
r   find_allr3   loadsr5   appendintrefinditergroup)
r&   r{   soup
candidatesr|   r   r   r   r   matchesr'   r'   r(   rt      s0   


zBingImageFinder._parse_htmlrE   c                 C   s   z9| j j|   | j j|dd}|jdkr7|j}t|td kr7|	ds4|	ds4d|dd	 v r7|W S W dS    Y dS )
z&Downloads image bytes with validation.   ro   rq   i   s      PNG   WEBPN   )
rc   rd   re   ri   r5   rr   contentr8   MIN_FILE_SIZE_KB
startswith)r&   rE   respr   r'   r'   r(   download_image   s   
$zBingImageFinder.download_imageN)r   )rW   rX   rY   rZ   r)   ri   r[   r   r   r   rz   rt   r   bytesr   r'   r'   r'   r(   r]      s    	#!r]   	slide_keyvisualization_promptrF   c              
      s  t   }t }|dddd}t| dk r|d7 }td|  d| d d}d}td	D ]}|j||d
}td|  d| dt| d |j	dd dd |D ]g}	|	
dd|	
dd  trlqVt fddtD stfddtD rqVtd|  ddd  d |}
|
rt|
 }t|rtd|  d qV|
}}t|  nqV|r nq1|rd}|drd}n
d|dd v rd }t|d!}t   | }td|  d"|d#d$ d%| d&| S td|  d' dS )(a  
    Public API used by other files.

    Args:
        slide_key: Unique identifier for the slide/request.
        visualization_prompt: The prompt describing the image.

    Returns:
        Base64 Data URI string (data:image/xyz;base64,...) or None.
    "rG   '   z professional photo[z] Searching for: 'Nr@   )rk   z] Page z: Found z candidatesc                 S   s   |  dd|  dd S )Nr   r   r   )r5   )xr'   r'   r(   <lambda>(  s    z'fetch_image_for_slide.<locals>.<lambda>T)keyreverserE   r   c                 3   s    | ]}| v V  qd S r    r'   .0bad)r   r'   r(   	<genexpr>3  s    z(fetch_image_for_slide.<locals>.<genexpr>c                 3   s    | ]	}|   v V  qd S r    )rJ   r   )rE   r'   r(   r   3  s    z
] Trying: <   z...z$] Duplicate content found, skipping.z
image/jpegr   z	image/pngr   r   z
image/webpr+   u   ] ✅ Image found in z.2fszdata:z;base64,u%   ] ❌ No image found after searching.)timer]   rL   r8   rM   r6   r7   rangerz   sortr5   rJ   HISTORYrR   anyEXCLUDED_TERMSr   hashlibmd5	hexdigestrT   r   base64	b64encodedecoder:   )r   r   
start_timefinderrj   found_image_databest_candidate_urlpager   cand	img_bytesimg_hash	mime_typeb64_strdurationr'   )r   rE   r(   fetch_image_for_slide  sX   "
, 
r   c                   C   s   t   dS )zClears the history file.N)r   rU   r'   r'   r'   r(   clear_used_imagesY  s   r   __main__z%Testing Storigo Image Generator v2...test_1z3professional modern office workspace with computerszResult length: ).rZ   osr3   r   rf   ra   r   loggingr   r   pathlibr   typingr   r   r   r   bs4r   urllib.parser   r	   __file__resolveparentBASE_DIRSTORAGE_DIRmkdirr0   rh   r   r   basicConfigINFO	getLoggerr6   r   r   r]   r[   r   r   rW   printresr8   r'   r'   r'   r(   <module>   sJ    
WhW
