o
    I
iP                  	   @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlZddlmZ ddlZddlmZmZ ejejdd	 eeZed
Zejdd g dZh dZg dg dg dg dg dg ddZ de!dee!e"f fddZ#de!de!defddZ$G dd dZ%de!d e!de
e! fd!d"Z&ed#kr|e'd$ e'd% e'd$ g d&Z(e Z)dZ*e+e(d'D ]e\Z,\Z-Z.e'd(d$  e'd)e, d*e/e( d+e-  e'd,e. d- e'd$  e Z0e&e-e.Z1e e0 Z2e1re'd.e2d/d0 e'd1e1  e*d'7 Z*n	e'd2e2d/d0 e,e/e(k r-e3d' qe e) Z4e'd(d$  e'd3 e'd4e/e( d5 e'd6e* d*e/e( d7e*e/e( d8 d9d: e'd;e4d/d0 e'd<e4e/e( d/d= e'd$ dS dS )>a  
================================================================================
 Storigo v10.0 - Ultra-Fast Accurate Bing Image Generator
 - Single-source Bing scraping with advanced accuracy
 - Multi-strategy semantic search with intelligent fallbacks
 - Enhanced copyright-free filtering
 - Parallel candidate validation for 3x speed improvement
 - No caching, no external APIs - Pure Bing excellence
================================================================================
    N)Path)OptionalDictListTuple)	urlencode)BeautifulSoup)ThreadPoolExecutoras_completedz?%(asctime)s - [%(levelname)s] - (StorigoImageGen) - %(message)s)levelformatgenerated_images_for_storigoT)exist_ok)zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36>0   aanasatbebyifinisofonortoandanyarebutcanforhadhasmaythewasbeenfromhavemanymoremostmuchsomethatthenthiswerewillwithbeingcouldmightshallthesethosewouldhavingshouldwithout)teamstaffpeoplepersonworkeremployeeprofessionalgroupcrowd
colleaguesmembers)	officemeetingbusiness	corporate	workspacedesk
conference	boardroom	executive)computerlaptopscreenmonitordata	analytics	dashboardchartgraphdigital
technology)	kitchen
restaurantchefcookingfooddiningmealculinarypreparation)	buildinginteriorexteriorroomhallspaceareafacilityvenue)working
discussing
presentingcollaborating	analyzingcreatingplanning
organizing)rA   rL   techrb   placesactionspromptreturnc                 C   sH  |    }| }td|}tdd|}tdd|}dd | D }g }g }i }|D ]+}	d}
t D ]\}}|	|v rR||	 |	|d	d
 ||< d}
 nq7|
sZ||	 q/g }t
t|d
 D ] }|| tvr||d
  tvr|||  d||d
    qeg }|r|d|d	 df |r|d	 }|r| d|d	  }|d|df |d|df |rt|dkr|d|d	  d|d
  df |r|d|d	  d|d	  df |d|d	 df |rd|dd }|d|df |s|dd|dd df ||dd  |dd  |dd |d!S )"zz
    Extract semantically meaningful keywords with intelligent prioritization
    Returns optimized search strategies
    z	"([^"]+)"z"[^"]+" z[^a-zA-Z0-9\s-] c                 S   s$   g | ]}t |d kr|tvr|qS )   )len	STOPWORDS).0t r   N/var/www/eduai.edurigo.com/image_generation/testing/storigo_image_generator.py
<listcomp>Y   s   $ z-extract_semantic_keywords.<locals>.<listcomp>Fr      Texact_quoted   phrase_enhanced_   phrase_pureZ   r   primary_duoU   primary_contextP   primary_singleK   N   full_contextF   original   <      )
strategiesprimarycontextphrases
categories)lowerstripsplitrefindallsubPRIORITY_PATTERNSitemsappendgetranger   r   join)r{   textoriginal_tokensquoted
text_cleantokensr   r   category_matchestokenmatchedcategorykeywordsr   ir   best_phraseenhanced_phraser   r   r   r   extract_semantic_keywordsK   sh   

 ""


r   	slide_keyqueryc                 C   sn   t dd| }d| dd }|sd}|dd }tt d d	 }|  d| d| d
}t| S )zGenerate clean, unique filenamez[^a-zA-Z0-9\s]r}   _Nr   image   i  i@B .jpg)r   r   r   r   r   inttime
IMAGES_DIR)r   r   clean_query	timestampfilenamer   r   r   generate_filename   s   r   c                
   @   s   e Zd ZdZdd Zdededee fddZd	edefd
dZ	d	ede
eeef  fddZdedefddZddee dede
eeef  fddZdedede
e fddZdS )BingImageScraperz<High-performance Bing image scraper with parallel processingc              
   C   s:   t  | _| jjttddddddd t | _	d S )NzJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8zen-US,en;q=0.9zgzip, deflate, br1z
keep-alive)
User-AgentAcceptzAccept-LanguagezAccept-EncodingDNT
ConnectionzUpgrade-Insecure-Requests)
requestsSessionsessionheadersupdaterandomchoiceUSER_AGENTSset_tried_urls)selfr   r   r   __init__   s   
	zBingImageScraper.__init__r   strategy_namer|   c                 C   s  z|dddddd}dt | }tt| jjd< d	| jjd
< | jj|dd}|jdkr9t	d|j  g W S t
|jd}g }|dddi}|dd D ]8}	z)|	d}
|
ryt|
}|dpg|d}|ry| |ry|| jvry|| W qO tjttfy   Y qOw t|dk r|dddi}|dd D ]}|dp|d}|r| |r|| jvr|| qt	d| dt| d| d  |dd! W S  tjy   t	d"|  g  Y S  ty
 } zt	d#| d$t|dd%   g W  Y d}~S d}~ww )&zo
        Search Bing images with copyright-free filter
        Returns list of high-quality image URLs
        z#+filterui:license-L2_L3_L4_L5_L6_L7IRFLTRr   50Moderate)qqftformfirstcount
safeSearchz#https://www.bing.com/images/search?r   zhttps://www.bing.com/Referer
   timeout   u     ⚠️ Bing returned status zhtml.parserr   classiuscN#   mmurlturlimgmimg   srczdata-srcu     → z: Found z URLs for ''   u     ⚠️ Timeout for u     ⚠️ Error in : r   )r   r   r   r   r   r   r   status_codeloggerdebugr   r   find_alljsonloads_is_valid_image_urlr   r   JSONDecodeErrorAttributeErrorKeyErrorr   r   Timeout	Exceptionstr)r   r   r   params
search_urlresponsesoup
image_urlsimage_containers	containerm_attrrW   img_urlimg_tagsr   r   er   r   r   _search_bing   s`   	





""zBingImageScraper._search_bingurlc                    s`   |r| ds	dS g d}|  t fdd|D }g d}t fdd|D }|o/| S )z'Validate image URL format and extensionhttpF)r   z.jpegz.pngz.webpc                 3       | ]}| v V  qd S Nr   )r   ext	url_lowerr   r   	<genexpr>      z7BingImageScraper._is_valid_image_url.<locals>.<genexpr>)faviconlogoiconavatar	thumbnailpixelc                 3   r  r  r   )r   termr  r   r   r  	  r  )
startswithr   r   )r   r  
valid_extshas_extensionblocked
is_blockedr   r  r   r      s   
z$BingImageScraper._is_valid_image_urlc              	   C   s   || j v rdS | j | z2| jj|dddd}|jdkr W dS |jdd }d|vr0W dS |j}| |s;W dS ||fW S  t	j
t	jfyM   Y dS  tyV   Y dS w )	z_
        Download image and validate quality
        Returns (image_data, url) or None
        N   T)r   allow_redirectsstreamr   zcontent-typer}   r   )r   addr   r   r   r   r   content_validate_qualityr   r   RequestExceptionr   )r   r  r  content_typerW   r   r   r   _download_and_validate  s0   



z'BingImageScraper._download_and_validaterW   c                 C   sp   |rt |dk r
dS t |dkrdS |dd dk}|dd dk}t |d	kr0|dd	 d
knd}|p7|p7|S )z&Validate image data quality and formati:  Fi Nr   s   r#  s   PNG

   s   WEBP)r   )r   rW   jpegpngwebpr   r   r   r(  5  s    z"BingImageScraper._validate_qualityr   urlsmax_workersc              
      s   t |dF  fdd|dd D }t|ddD ](}z| }|r8|D ]}|  q%|W   W  d   S W q tyB   Y qw W d   dS 1 sNw   Y  dS )zh
        Download multiple URLs in parallel for speed
        Returns first successful download
        )r1  c                    s   i | ]
}  j||qS r   )submitr+  )r   r  executorr   r   r   
<dictcomp>J  s    z7BingImageScraper._parallel_download.<locals>.<dictcomp>N   r,  r   )r	   r
   resultcancelr   )r   r0  r1  futuresfuturer7  fr   r3  r   _parallel_downloadD  s*   


z#BingImageScraper._parallel_downloadr   r{   c              
   C   s  zHt |}|d }td| dt| d td|d   g }|dd D ];\}}}td	| d
| d| d | ||}	|	rS|	D ]
}
||
||f qHtdd |D dkr` ntd q*|stt	d| d W dS |j
dd dd g }t }|D ]\}
}}|
|vr||
 ||
 qtdt| d td | |}|r|\}}t||d d d }|| t|}d|j }
td| d td|j  td |d!d"|d# d$d% td& |
W S td' |dd( D ];}
| |
}|r7|\}}t||d d d }|| d|j }td| d)|j  |  W S td* qt	d| d+ W dS  tyj } ztd,| d-t|  W Y d}~dS d}~ww ).z
        Main entry point: Fetch accurate image from Bing
        
        Args:
            slide_key: Slide identifier
            prompt: Visualization description
            
        Returns:
            Image URL path or None
        r   u   🔍 [z] Searching Bing with z strategiesz   Primary keywords: r   Nr   u     → Strategy 'z': 'z' (priority: )c                 S   s   g | ]
}|d  dkr|qS )r   r   r   )r   cr   r   r   r   {  s    z0BingImageScraper.fetch_image.<locals>.<listcomp>r   g333333?u   ⚠️ [z] No image candidates foundc                 S   s   | d S )Nr   r   )xr   r   r   <lambda>  s    z.BingImageScraper.fetch_image.<locals>.<lambda>T)keyreversez	   Found z unique candidatesu%     ⚡ Attempting parallel download...r   r   z/images/u   ✅ [z ] Image downloaded successfully!z	   File: z	   Size: ,z bytes (i   .1fz KB)z    Source: Bing (copyright-free)u#     → Trying sequential download...r   z] Image saved: g?z(] No valid images found after validationu   ❌ [z	] Error: )r   r   infor   r   r  r   r   sleepwarningsortr   r&  r<  r   write_bytesnamer+  r   errorr  )r   r   r{   keywords_datar   all_candidatesr   r   priorityr0  r  unique_urlsseenstrategyr7  
image_data
source_url
image_path	file_size
url_resultr  r   r   r   fetch_imageZ  st   




 




zBingImageScraper.fetch_imageN)r   )__name__
__module____qualname____doc__r   r  r   r  boolr   r   r   bytesr+  r(  r   r<  rW  r   r   r   r   r      s    B((r   visualization_promptc                 C   s   t  }|| |S )a  
    PUBLIC API: Fetch accurate copyright-free image from Bing
    
    Args:
        slide_key: Slide identifier (e.g., "slide_1")
        visualization_prompt: The visualization suggestion text
        
    Returns:
        Image URL path (e.g., "/images/slide_1_kitchen_staff_123456.jpg") or None
        
    Example:
        >>> url = fetch_image_for_slide("slide_1", "kitchen staff in commercial kitchen")
        >>> print(url)  # "/images/slide_1_kitchen_staff_987654.jpg"
    )r   rW  )r   r^  scraperr   r   r   fetch_image_for_slide  s   r`  __main__zP================================================================================u:   🧪 Testing Storigo Ultra-Fast Bing Image Generator v10.0))slide_1z+kitchen staff working in commercial kitchen)slide_2z+data analytics dashboard with charts graphs)slide_3z,business team collaboration in modern office)slide_4z.professional chef preparing food in restaurant)slide_5z1corporate meeting with people discussing strategyr   
u
   📸 Test /r   z   Prompt: 'r   u      ✅ SUCCESS in z.2fsz   URL: u      ❌ FAILED after u   📊 Test Results:z
   Total: z testsz   Success: z (r   rD  z%)z   Total time: z   Average: zs per image)5r[  r   hashlibr   loggingr   ospathlibr   typingr   r   r   r   urllib.parser   r   bs4r   r   concurrent.futuresr	   r
   basicConfigINFO	getLoggerrX  r   r   mkdirr   r   r   r  r   r   r   r   r`  print
test_casestotal_startsuccess_count	enumerateidxr   r{   r   
start_timer  elapsedrF  
total_timer   r   r   r   <module>   s    


N  




.