"""
Ultra-Fast Professional Image Scraper (Bing Direct)
Optimized for speed and reliability - directly scrapes Bing Images
"""

import re
import base64
import requests
import random
import time
import io
import json
from typing import Optional, Dict, Any, List
from PIL import Image, ImageEnhance, ImageFilter
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import quote_plus

class ImageScraperService:
    """
    Direct Bing Image Scraper.
    Fastest way to get relevant, real-world professional images.
    """

    TARGET_WIDTH = 1920
    TARGET_HEIGHT = 1080
    TIMEOUT = 4  # Aggressive timeout for speed
    MAX_CANDIDATES = 8  # Parallel checks

    # Rotation of user agents to avoid blocking
    USER_AGENTS = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"
    ]

    def __init__(self):
        self.session = requests.Session()
        adapter = requests.adapters.HTTPAdapter(
            pool_connections=20,
            pool_maxsize=20,
            max_retries=1
        )
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)

    def _get_headers(self) -> Dict[str, str]:
        return {
            "User-Agent": random.choice(self.USER_AGENTS),
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
        }

    def _get_search_query(self, roleplay_name: str) -> str:
        """
        Constructs a strict professional query:
        - Specific role
        - 'Single person' emphasis
        - 'Corporate/Studio' setting
        - Exclusion of groups/cartoons
        """
        if not roleplay_name:
            return "professional corporate business portrait single person"

        clean = roleplay_name.strip()
        # Remove meta prefixes
        for prefix in ['Roleplay:', 'Scenario:', 'Sr.', 'Jr.']:
            if clean.startswith(prefix):
                clean = clean[len(prefix):].strip()

        # "roleplay_name" + semantic boosters
        # Exclude: group, team, meeting, drawing, illustration, 3d, collage
        query = (
            f"{clean} professional corporate headshot single person "
            "business portrait real 8k "
            "-group -team -meeting -collage -drawing -illustration -cartoon -3d -vector"
        )
        return query

    def _scrape_bing_urls(self, query: str) -> List[str]:
        """
        Scrapes Bing Images with strict filters for Professional Quality.
        """
        try:
            # Bing Advanced Filters:
            # aspect-wide   : 16:9 preference
            # imagesize-large : High Resolution
            # face-portrait : "Head and shoulders" (better than just 'face')
            # photo-photo   : Real photography only (no AI/Art)
            filters = "+filterui:aspect-wide+filterui:imagesize-large+filterui:face-portrait+filterui:photo-photo"

            # Form=IRFLTR ensures filters are applied stricter
            url = f"https://www.bing.com/images/search?q={quote_plus(query)}&qft={filters}&form=IRFLTR&first=1"

            response = self.session.get(
                url,
                headers=self._get_headers(),
                timeout=5
            )

            if response.status_code != 200:
                print(f"Bing status: {response.status_code}")
                return []

            # Extract murl
            matches = re.findall(r'murl&quot;:&quot;(https?://[^&]+)&quot;', response.text)

            urls = []
            seen = set()
            for m in matches:
                # Extra filetype safety
                if m not in seen and not any(m.endswith(ext) for ext in ['.svg', '.gif', '.ico']):
                    urls.append(m)
                    seen.add(m)

            return urls[:10]  # Focus on top 10 most relevant

    def _download_and_process(self, url: str) -> Optional[str]:
        """
        Downloads a candidate URL and processes it into a 16:9 1080p base64 JPEG.
        """
        try:
            # Fast download
            r = self.session.get(url, headers=self._get_headers(), timeout=self.TIMEOUT)

            if r.status_code != 200:
                return None

            if len(r.content) < 5000: # Ignore tiny thumbnails
                return None

            img = Image.open(io.BytesIO(r.content))

            if img.mode != 'RGB':
                img = img.convert('RGB')

            w, h = img.size
            if w < 500 or h < 300: # Too small
                return None

            # Crop/Resize to 1920x1080
            # 1. Determine crop to 16:9
            target_ratio = 16 / 9
            current_ratio = w / h

            if abs(current_ratio - target_ratio) > 0.05:
                if current_ratio > target_ratio:
                    # Too wide, crop width
                    new_w = int(h * target_ratio)
                    left = (w - new_w) // 2
                    img = img.crop((left, 0, left + new_w, h))
                else:
                    # Too tall, crop height (focus on top for portraits)
                    new_h = int(w / target_ratio)
                    # For portraits, faces are usually at the top, so crop significantly more from bottom
                    top = int((h - new_h) * 0.2) # 20% from top
                    img = img.crop((0, top, w, top + new_h))

            # 2. Resize to 1920x1080
            if img.size != (self.TARGET_WIDTH, self.TARGET_HEIGHT):
                img = img.resize((self.TARGET_WIDTH, self.TARGET_HEIGHT), Image.Resampling.LANCZOS)

            # 3. Enhance
            # Slight sharpen to verify quality
            img = img.filter(ImageFilter.UnsharpMask(radius=1.1, percent=100, threshold=3))

            buf = io.BytesIO()
            img.save(buf, format='JPEG', quality=90, optimize=True)
            return f"data:image/jpeg;base64,{base64.b64encode(buf.getvalue()).decode()}"

        except Exception:
            # Silently fail on individual image errors to keep trying others
            return None

    def get_roleplay_image(self, roleplay_data: Dict[str, Any]) -> Optional[str]:
        """
        Main entry point. Scrapes Bing for 'roleplay_name'.
        """
        start_time = time.time()
        print("\n" + "="*60)
        print("⚡ BING IMAGE SCRAPER (DIRECT)")
        print("="*60)

        roleplay_name = roleplay_data.get('roleplay_name', '')
        query = self._get_search_query(roleplay_name)
        print(f"🔍 Searching: '{query}'")

        # 1. Scrape URLs
        urls = self._scrape_bing_urls(query)
        print(f"found {len(urls)} candidates")

        if not urls:
            print("❌ No images found on Bing.")
            return None

        # 2. Parallel Download & Process (Race to first success)
        # We try the top candidates. First valid one wins.
        print(f"⚡ Processing top {self.MAX_CANDIDATES} candidates in parallel...")

        result = None

        with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
            # Submit tasks
            future_to_url = {
                executor.submit(self._download_and_process, url): url
                for url in urls[:self.MAX_CANDIDATES]
            }

            for future in as_completed(future_to_url):
                try:
                    res = future.result()
                    if res:
                        result = res
                        # Cancel others
                        executor.shutdown(wait=False, cancel_futures=True)
                        break
                except Exception:
                    pass

        elapsed = time.time() - start_time

        if result:
            print(f"✅ SUCCESS in {elapsed:.2f}s")
            print("="*60 + "\n")
            return result
        else:
            print(f"❌ ALL candidates failed processing in {elapsed:.2f}s")
            print("="*60 + "\n")
            return None

# Singleton instance
image_scraper_service = ImageScraperService()
