o
    :iL                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZmZmZmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZ G dd	 d	Ze ZdS )
z
High-Performance Professional Image Scraper v2.
Optimized for SPEED, QUALITY, and RELEVANCE.
Fetches single-person, front-facing professional portraits with AI validation.
    N)OptionalDictAnyListTuple)ImageImageEnhance)ThreadPoolExecutoras_completed)GroqService)OllamaServicec                   @   s0  e Zd ZdZdZdZdZg dZdddd	d
Zdd Z	de
eef fddZde
eef defddZdedee fddZdedee fddZdedeeef fddZdedeeef fddZdededefdd Zd*d"ededee fd#d$Zdedee fd%d&Zde
eef dee fd'd(Zd)S )+ImageScraperServicezx
    Enhanced image scraper with AI validation and face detection.
    >2x faster with 3x better quality filtering.
    i  i8     )zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36zPhttps://images.unsplash.com/photo-1560250097-0b93528c311a?w=1920&h=1080&fit=cropzShttps://images.unsplash.com/photo-1573496359142-b8d87734a5a2?w=1920&h=1080&fit=cropzShttps://images.unsplash.com/photo-1519085360753-af0119f7cbe7?w=1920&h=1080&fit=cropzShttps://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?w=1920&h=1080&fit=crop)manager	executive
consultantprofessionalc                 C   s   t  | _| jjddi zt | _t | _d| _	W n   d | _d | _d| _	Y zdd l
}|| _
d| _W d S    d | _
d| _Y d S )N
Connectionz
keep-aliveTFr   )requestsSessionsessionheadersupdater   groq_servicer   ollama_servicehas_aicv2has_cv2)selfr    r   M/var/www/eduai.edurigo.com/roleplay/testing/services/image_scraper_service.py__init__-   s"   

zImageScraperService.__init__returnc                 C   s   t | jdddddS )NzJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8zen-US,en;q=0.9https://duckduckgo.com/1)z
User-AgentAcceptzAccept-LanguageRefererDNT)randomchoiceUSER_AGENTS)r   r   r   r    _get_headersD   s   
z ImageScraperService._get_headersroleplay_datac                 C   s   | dd}| dd}t|dk r$dt|   kr#dkr$| S  | jr]| jr]z.d| d| d	}| jjd
|dgdd}|rWt| dk rW| ddddW S W n   Y | dd }d| S )zH
        Extracts job title with AI fallback for complex roles.
        zLearner role category         zXExtract a 2-4 word professional job title from this description:
                
Role: z
Category: z{

Return ONLY the job title, nothing else. Examples: "Marketing Manager", "Software Engineer", "Sales Director"

Job Title:user)rolecontent   )
max_tokens2   "'N    )	getlensplitstripr   r   _call_groq_apireplacejoin)r   r,   r3   catpromptrespwordsr   r   r    _get_job_title_fastM   s.   &	
z'ImageScraperService._get_job_title_fast	job_titlec                 C   sd   d| dd| dd| dd| dg}|d  d|d  d	|d
  d|d  dg}|S )zU
        Builds ULTRA-STRICT queries for professional studio headshots only.
        r8   z0" professional headshot studio portrait businessz0" corporate headshot linkedin professional photoz/" business portrait studio background executivez1" professional portrait corporate office headshotr   z site:unsplash.com   z site:pexels.comr0   z! "studio lighting" "professional"r:   z! "business attire" "front facing"r   )r   rH   strict_termsqueriesr   r   r    build_fast_queriesq   s   



	z&ImageScraperService.build_fast_queriesqueryc                 C   s  t dD ]}ze| jjdd|i|  dd}td|j}|s%td W qdd	||	d
ddddd}| jjd||  dd}|
 }|dg }g }|dd D ]}	|	dd}
d|
 vrf|
rf||
 qQ|W   S  ty } z|dkr|td W Y d}~qd}~ww g S )z@
        Fast DuckDuckGo image search with retry logic.
        r0   r#   qr1   )paramsr   timeoutzvqd="?([^"&]+)"?      ?zus-enjsonrI   z,,,layout:tall,size:Large,,r$   images)lorN   vqdfpiaxiazhttps://duckduckgo.com/i.js   resultsNr5   imager-   thumbr   )ranger   r<   r+   researchtexttimesleepgrouprR   lowerappend	Exception)r   rM   attemptr	vqd_matchrO   datar\   urlsresimg_urler   r   r    _scrape_ddg_fast   sV   



z$ImageScraperService._scrape_ddg_fastimgc                 C   s&  |j \}}|dk s|dk rdS || }|dk s|dkrdS zo|d}t|}| }|dk s4|dkr7W d	S | }|d
k rBW dS t|}	|	 }
|
dk rRW dS |	dt|d ddddf }|jdd}|d dkrx|d dk rxW dS |d dkr|d dk rW dS W dS W dS    Y dS )zc
        STRICT professional image quality validation.
        Returns (is_valid, reason).
        i   )F	too_small333333?g?)Fnot_portrait_ratioL<      )Fpoor_studio_lighting#   )Flow_contrast_unprofessionalr/   )Fflat_unprofessionalNg?)r   rI   )axisr0      r      )Foutdoor_sky_backgroundrI   x   )Foutdoor_nature_background)Tpass)sizeconvertnparraymeanstdint)r   rr   whratio	grayscalearrmean_brightnessstd_devrgb_arrcolor_variancetop_section	avg_colorr   r   r    _assess_image_quality   s@   



"z)ImageScraperService._assess_image_qualityc              
   C   sj  | j sdS z| jt|| jj}| j|| jj}| j| jjj	d }|j
|dddd}t|dkr8W dS t|d	krAW d
S |d \}}}}	|j\}
}||	d  }||d kr]W dS ||d  }t||
d  |
 }|dkrtW dS ||	 }|
| }|| }|dk rW dS |dkrW dS d|  krdksW dS  W dS W dS  ty } zdW  Y d}~S d}~ww )zz
        STRICT face detection - must be frontal, centered, and professional.
        Returns (has_face, reason).
        )Tno_cv2z#haarcascade_frontalface_default.xml?r[   )r   r   )scaleFactorminNeighborsminSizer   )Fno_face_detectedrI   )Fmultiple_peopler0   rt   )Fface_not_centered_verticallyg      ?)Fface_not_centered_horizontallygQ?)Fface_too_small_not_headshotg?)Fextreme_closeup_unprofessionalg333333?rQ   )Fimproper_headshot_framing)Tprofessional_headshot_detectedTdetection_errorN)Tr   )r   r   cvtColorr   r   COLOR_RGB2BGRCOLOR_BGR2GRAYCascadeClassifierrl   haarcascadesdetectMultiScaler=   r   absrh   )r   rr   cv_imggrayface_cascadefacesxyr   r   img_wimg_hface_center_yface_center_xhorizontal_deviation	face_areaimg_area
face_ratiorp   r   r   r    _detect_frontal_face   sV   
z(ImageScraperService._detect_frontal_face
base64_imgc                 C   sX   | j r| jsdS zd| d}t| jdr#| j||}d| v W S W dS    Y dS )z
        Uses AI vision model to validate image relevance (optional).
        Only used if available and as final validation.
        Ta7  Analyze this image strictly. Answer ONLY 'YES' or 'NO'.

Requirements (ALL must be met):
- Exactly ONE person visible
- Person facing camera directly (not side profile)
- Professional/business attire or neutral clothing
- Studio, office, or plain background (not outdoor/casual)
- Appears relevant to job role: z

Answer (YES or NO):analyze_imageYES)r   r   hasattrr   upper)r   r   rH   rD   responser   r   r    _validate_with_ai9  s   z%ImageScraperService._validate_with_air-   urlc              
      s  zg d}t  fdd|D sW dS | jj |  dd}|jdks*t|jdk r-W dS tt	
|jd	}| |\}}|sEW dS | |\}}	|sQW dS |j\}
}d
}|
| |k rtt|
| }||krj|}|dd|
|f}nt|| }|
| d }||d|| |f}|| j| jftj}t|d}t|d}t|d}t	
 }|j|dddd t|  }d| }|W S  ty } zW Y d}~dS d}~ww )z
        Downloads, validates, and processes a single image URL.
        Returns Base64 string if all validations pass.
        )z.jpgz.jpegz.pngzimages.unsplashzimages.pexelszpixabay.comc                 3   s    | ]	}|   v V  qd S N)rf   ).0extr   r   r    	<genexpr>_  s    z=ImageScraperService._fetch_and_process_url.<locals>.<genexpr>Nr   )r   rP      i N  RGBgqq?r   r0   gffffff?r   g{Gz?JPEG\   T)formatqualityoptimizezdata:image/jpeg;base64,) anyr   r<   r+   status_coder=   r4   r   openioBytesIOr   r   r   r   r   cropresizeTARGET_WIDTHTARGET_HEIGHTLANCZOSr   	SharpnessenhanceContrast
Brightnesssavebase64	b64encodegetvaluedecoderh   )r   r   rH   valid_extensionsrj   rr   quality_passquality_reason	face_passface_reasonr   r   target_rationew_hnew_wleftbufb64base64_datarp   r   r   r    _fetch_and_process_urlW  sJ   

z*ImageScraperService._fetch_and_process_urlc                    s   |   t fdddD r| jd}n*t fdddD r(| jd}nt fddd	D r:| jd
}n| jd}z| ||W S    Y dS )zI
        Returns a curated fallback image based on job category.
        c                 3       | ]}| v V  qd S r   r   r   term	job_lowerr   r    r         z:ImageScraperService._get_fallback_image.<locals>.<genexpr>)r   directorleadheadr   c                 3   r   r   r   r   r   r   r    r     r   )r   ceo	presidentvpr   c                 3   r   r   r   r   r   r   r    r     r   )r   advisoranalystr   r   N)rf   r   FALLBACK_IMAGESr<   r   )r   rH   fallback_urlr   r   r    _get_fallback_image  s   z'ImageScraperService._get_fallback_imagec                    s  t   }td |td d }tdt| d g }tdd"  fdd	|D }t|D ]}| }|| q:W d
   n1 sPw   Y  t	t
|}tdt| dt   | dd |sytd S tdt|}	t|	d>  fdd|d
d D }
t|
D ]"}| }|rt   | }td|dd |  W  d
   S qW d
   n1 sw   Y  td }|rtdt   | dd |S td d
S )zZ
        Main entry point: fetches best professional image for roleplay scenario.
        z'INFO: Starting Enhanced Image Search...zINFO: Target Role: 'r9   zINFO: Built z search queriesr[   )max_workersc                    s   i | ]
}  j||qS r   )submitrq   )r   rN   )executorr   r   r    
<dictcomp>      z:ImageScraperService.get_roleplay_image.<locals>.<dictcomp>NzINFO: Found z unique candidates in z.2fsz)WARN: No candidates found, using fallback   c                    s   g | ]
}  j|qS r   )r  r   )r   r   r  rH   r   r   r    
<listcomp>  r  z:ImageScraperService.get_roleplay_image.<locals>.<listcomp>   z%SUCCESS: High-quality image found in z5WARN: No valid images found in search, using fallbackz"SUCCESS: Fallback image loaded in zERROR: All methods failed)rc   printrG   rL   r=   r	   r
   resultextendlistdictfromkeysr   min)r   r,   
start_timerK   all_candidate_urlsfuture_to_queryfuturerm   candidate_urlsmax_concurrentfuturesr  elapsedfallbackr   r  r    get_roleplay_image  sV   

$


z&ImageScraperService.get_roleplay_imageN)r-   )__name__
__module____qualname____doc__r   r   TIMEOUTr*   r   r!   r   strr+   r   rG   r   rL   rq   r   r   boolr   r   r   r   r   r   r  r   r   r   r    r      s,    	$:6BE"r   )r  r`   r   r   r(   rc   r   numpyr   typingr   r   r   r   r   PILr   r   concurrent.futuresr	   r
   services.groq_servicer   services.ollama_servicer   r   image_scraper_servicer   r   r   r    <module>   s$      
_