import re
import requests
from urllib.parse import urlparse, parse_qs
from dataclasses import dataclass
from typing import Optional, Dict, Any, Tuple
import yt_dlp
from datetime import timedelta

@dataclass
class VideoMetadata:
    """Structure to hold video metadata"""
    video_id: str
    title: str
    duration_seconds: int
    duration_formatted: str
    description: str
    thumbnail_url: str
    uploader: str
    upload_date: str
    view_count: int
    is_accessible: bool
    is_within_length_limit: bool
    error_message: Optional[str] = None

class YouTubeInputProcessor:
    """
    Handles the input processing stage for YouTube to slides conversion
    """
    
    def __init__(self, max_duration_minutes: int = 20):
        self.max_duration_seconds = max_duration_minutes * 60
        self.max_duration_minutes = max_duration_minutes
        
        # Configure yt-dlp options for metadata extraction only
        self.ydl_opts = {
            'quiet': True,
            'no_warnings': True,
            'extract_flat': False,
            'skip_download': True,  # Only extract metadata, don't download
            'writeinfojson': False,
            'writesubtitles': False,
        }
    
    def extract_video_id(self, url: str) -> Optional[str]:
        """Extract YouTube video ID from various URL formats"""
        
        # Common YouTube URL patterns
        patterns = [
            r'(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})',
            r'(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
            r'(?:youtu\.be\/)([a-zA-Z0-9_-]{11})',
            r'(?:youtube\.com\/v\/)([a-zA-Z0-9_-]{11})',
            r'^([a-zA-Z0-9_-]{11})$'  # Direct video ID
        ]
        
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                return match.group(1)
        
        return None
    
    def validate_url(self, url: str) -> Tuple[bool, str, Optional[str]]:
        """
        Validate YouTube URL and extract video ID
        Returns: (is_valid, message, video_id)
        """
        
        if not url or not isinstance(url, str):
            return False, "Please provide a valid URL", None
        
        url = url.strip()
        
        # Extract video ID
        video_id = self.extract_video_id(url)
        if not video_id:
            return False, "Invalid YouTube URL format. Please provide a valid YouTube video URL.", None
        
        # Basic URL validation
        if 'youtube.com' in url or 'youtu.be' in url or len(video_id) == 11:
            return True, "Valid YouTube URL", video_id
        
        return False, "URL does not appear to be from YouTube", None
    
    def format_duration(self, seconds: int) -> str:
        """Convert seconds to human-readable format"""
        duration = timedelta(seconds=seconds)
        hours = duration.seconds // 3600
        minutes = (duration.seconds % 3600) // 60
        seconds = duration.seconds % 60
        
        if hours > 0:
            return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
        else:
            return f"{minutes:02d}:{seconds:02d}"
    
    def extract_metadata(self, video_id: str) -> VideoMetadata:
        """
        Extract comprehensive metadata from YouTube video
        """
        
        url = f"https://www.youtube.com/watch?v={video_id}"
        
        try:
            with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
                info = ydl.extract_info(url, download=False)
                
                # Extract relevant metadata
                duration_seconds = info.get('duration', 0)
                
                # Check if video is within length limit
                is_within_limit = duration_seconds <= self.max_duration_seconds
                
                metadata = VideoMetadata(
                    video_id=video_id,
                    title=info.get('title', 'Unknown Title'),
                    duration_seconds=duration_seconds,
                    duration_formatted=self.format_duration(duration_seconds),
                    description=info.get('description', '')[:500] + '...' if len(info.get('description', '')) > 500 else info.get('description', ''),
                    thumbnail_url=info.get('thumbnail', ''),
                    uploader=info.get('uploader', 'Unknown'),
                    upload_date=info.get('upload_date', ''),
                    view_count=info.get('view_count', 0),
                    is_accessible=True,
                    is_within_length_limit=is_within_limit,
                    error_message=None if is_within_limit else f"Video duration ({self.format_duration(duration_seconds)}) exceeds maximum limit of {self.max_duration_minutes} minutes"
                )
                
                return metadata
                
        except Exception as e:
            error_msg = str(e)
            
            # Handle common error cases
            if 'Private video' in error_msg:
                error_msg = "This video is private and cannot be accessed"
            elif 'Video unavailable' in error_msg:
                error_msg = "This video is not available or has been removed"
            elif 'restricted' in error_msg.lower():
                error_msg = "This video has geographic or age restrictions"
            else:
                error_msg = f"Unable to access video: {error_msg}"
            
            return VideoMetadata(
                video_id=video_id,
                title="Unknown",
                duration_seconds=0,
                duration_formatted="00:00",
                description="",
                thumbnail_url="",
                uploader="Unknown",
                upload_date="",
                view_count=0,
                is_accessible=False,
                is_within_length_limit=False,
                error_message=error_msg
            )
    
    def process_input(self, url: str) -> Dict[str, Any]:
        """
        Main processing function that handles the complete input processing stage
        """
        
        result = {
            'success': False,
            'stage': 'input_processing',
            'metadata': None,
            'next_action': None,
            'user_message': '',
            'technical_details': {}
        }
        
        # Step 1: Validate URL
        is_valid, validation_message, video_id = self.validate_url(url)
        
        if not is_valid:
            result['user_message'] = validation_message
            result['technical_details']['validation_error'] = validation_message
            return result
        
        # Step 2: Extract metadata
        print(f"Processing video ID: {video_id}")
        metadata = self.extract_metadata(video_id)
        result['metadata'] = metadata
        
        # Step 3: Check accessibility
        if not metadata.is_accessible:
            result['user_message'] = f"❌ Unable to process video: {metadata.error_message}"
            result['technical_details']['access_error'] = metadata.error_message
            return result
        
        # Step 4: Check duration limits
        if not metadata.is_within_length_limit:
            result['user_message'] = f"⚠️ Video duration ({metadata.duration_formatted}) exceeds the {self.max_duration_minutes}-minute limit. Please provide a shorter video or we can implement segmentation in the future."
            result['technical_details']['duration_error'] = metadata.error_message
            return result
        
        # Step 5: Success - ready for next stage
        result['success'] = True
        result['next_action'] = 'content_extraction'
        result['user_message'] = f"✅ Video processed successfully!\n\n📹 **{metadata.title}**\n⏱️ Duration: {metadata.duration_formatted}\n👤 Channel: {metadata.uploader}\n👀 Views: {metadata.view_count:,}\n\n🎯 Ready to proceed with content extraction and slide generation."
        
        result['technical_details'] = {
            'video_id': video_id,
            'duration_check': 'passed',
            'accessibility_check': 'passed',
            'estimated_slides': max(3, min(15, metadata.duration_seconds // 60)),  # Rough estimation
        }
        
        return result
    
    def get_processing_summary(self, result: Dict[str, Any]) -> str:
        """Generate a user-friendly summary of the processing result"""
        
        if result['success']:
            metadata = result['metadata']
            estimated_slides = result['technical_details'].get('estimated_slides', 'Unknown')
            
            return f"""
🎬 **VIDEO ANALYSIS COMPLETE**

📊 **Processing Summary:**
• Video Title: {metadata.title}
• Duration: {metadata.duration_formatted}
• Channel: {metadata.uploader}
• Estimated Slides: ~{estimated_slides}

✅ **Status:** Ready for content extraction
🎯 **Next Step:** Extract audio and generate transcript

The video meets all requirements and is ready for slide generation!
            """
        else:
            return f"""
❌ **PROCESSING FAILED**

{result['user_message']}

💡 **Suggestions:**
• Ensure the YouTube URL is correct and public
• Try a different video under {self.max_duration_minutes} minutes
• Check if the video has any restrictions
            """

# Example usage and testing
def main():
    """Example usage of the YouTube Input Processor"""
    
    processor = YouTubeInputProcessor(max_duration_minutes=20)
    
    # Test URLs (replace with actual URLs for testing)
    test_urls = [
        "https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # Rick Roll (short video)
        "https://youtu.be/dQw4w9WgXcQ",  # Short format
        "invalid_url",  # Invalid URL test
        "dQw4w9WgXcQ",  # Direct video ID
    ]
    
    for url in test_urls:
        print(f"\n{'='*50}")
        print(f"Testing URL: {url}")
        print('='*50)
        
        result = processor.process_input(url)
        summary = processor.get_processing_summary(result)
        print(summary)
        
        if result['success']:
            print(f"\n🔧 Technical Details:")
            for key, value in result['technical_details'].items():
                print(f"   {key}: {value}")

if __name__ == "__main__":
    main()