import os
import json
import tempfile
import yt_dlp
import subprocess
from dataclasses import dataclass, asdict
from typing import Optional, Dict, Any, Tuple
from datetime import datetime
from pathlib import Path


@dataclass
class ExtractedContent:
    video_id: str
    transcript: str
    duration: float
    language: str
    word_count: int
    success: bool
    video_title: str = ""
    extraction_method: str = ""
    extraction_timestamp: str = ""
    error_message: Optional[str] = None


class WorkingYouTubeExtractor:
    def __init__(self, save_dir: str = "transcripts"):
        self.temp_dir = tempfile.gettempdir()
        self.save_dir = Path(save_dir)
        self.save_dir.mkdir(exist_ok=True)  # Create directory if it doesn't exist
    
    def get_save_filename(self, video_id: str) -> Path:
        """Generate filename for saving transcript"""
        return self.save_dir / f"{video_id}_transcript.json"
    
    def save_transcript(self, content: ExtractedContent) -> bool:
        """Save extracted content to JSON file"""
        try:
            save_file = self.get_save_filename(content.video_id)
            
            # Convert to dictionary for JSON serialization
            content_dict = asdict(content)
            
            with open(save_file, 'w', encoding='utf-8') as f:
                json.dump(content_dict, f, indent=2, ensure_ascii=False)
            
            print(f"✅ Transcript saved to: {save_file}")
            return True
            
        except Exception as e:
            print(f"❌ Failed to save transcript: {e}")
            return False
    
    def load_transcript(self, video_id: str) -> Optional[ExtractedContent]:
        """Load previously saved transcript"""
        try:
            save_file = self.get_save_filename(video_id)
            
            if not save_file.exists():
                return None
            
            with open(save_file, 'r', encoding='utf-8') as f:
                content_dict = json.load(f)
            
            # Convert back to dataclass
            content = ExtractedContent(**content_dict)
            
            print(f"✅ Loaded saved transcript from: {save_file}")
            return content
            
        except Exception as e:
            print(f"❌ Failed to load transcript: {e}")
            return None
    
    def extract_captions(self, video_id: str, url: str) -> Tuple[bool, str]:
        """Try to extract existing YouTube captions"""
        try:
            print("🔍 Checking for captions...")
            
            ydl_opts = {
                'writesubtitles': True,
                'writeautomaticsub': True,
                'subtitleslangs': ['en'],
                'subtitlesformat': 'vtt',
                'skip_download': True,
                'outtmpl': os.path.join(self.temp_dir, video_id),
                'quiet': True,
            }
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
            
            # Look for caption files
            import glob
            caption_files = glob.glob(os.path.join(self.temp_dir, f'{video_id}*.vtt'))
            
            if caption_files:
                transcript = self.parse_captions(caption_files[0])
                # Cleanup
                for file in caption_files:
                    try:
                        os.remove(file)
                    except:
                        pass
                
                if transcript:
                    print("✅ Found captions!")
                    return True, transcript
            
            return False, ""
            
        except Exception as e:
            print(f"Caption extraction failed: {e}")
            return False, ""
    
    def parse_captions(self, file_path: str) -> str:
        """Parse VTT caption file"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            lines = content.split('\n')
            transcript_lines = []
            
            for line in lines:
                line = line.strip()
                if (line and 
                    not line.startswith('WEBVTT') and 
                    '-->' not in line and 
                    not line.isdigit() and
                    not line.startswith('NOTE')):
                    # Clean formatting
                    line = line.replace('<c>', '').replace('</c>', '')
                    if line:
                        transcript_lines.append(line)
            
            return ' '.join(transcript_lines)
            
        except Exception:
            return ""
    
    def extract_audio(self, video_id: str, url: str) -> Tuple[bool, str]:
        """Extract audio using basic download"""
        try:
            print("🎵 Downloading audio...")
            
            ydl_opts = {
                'format': 'bestaudio',
                'outtmpl': os.path.join(self.temp_dir, f'{video_id}.%(ext)s'),
                'quiet': True,
            }
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
            
            # Find downloaded file
            import glob
            audio_files = glob.glob(os.path.join(self.temp_dir, f'{video_id}.*'))
            
            if audio_files and os.path.getsize(audio_files[0]) > 0:
                print(f"✅ Audio downloaded: {os.path.basename(audio_files[0])}")
                return True, audio_files[0]
            
            return False, ""
            
        except Exception as e:
            print(f"Audio download failed: {e}")
            return False, ""
    
    def transcribe_audio(self, audio_file: str) -> str:
        """Transcribe audio with Whisper CLI"""
        try:
            print("🎙️ Transcribing audio...")
            
            cmd = [
                'whisper', audio_file, 
                '--model', 'tiny', 
                '--output_format', 'txt',
                '--output_dir', self.temp_dir
            ]
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
            
            if result.returncode == 0:
                base_name = os.path.splitext(os.path.basename(audio_file))[0]
                txt_file = os.path.join(self.temp_dir, f'{base_name}.txt')
                
                if os.path.exists(txt_file):
                    with open(txt_file, 'r', encoding='utf-8') as f:
                        transcript = f.read().strip()
                    
                    # Cleanup
                    try:
                        os.remove(txt_file)
                    except:
                        pass
                    
                    return transcript
            
            return ""
            
        except Exception:
            return ""
    
    def extract_content(self, video_id: str, force_refresh: bool = False) -> ExtractedContent:
        """Main extraction method with save/load functionality"""
        
        # Try to load existing transcript first
        if not force_refresh:
            existing_content = self.load_transcript(video_id)
            if existing_content and existing_content.success:
                print(f"📁 Using saved transcript for {video_id}")
                return existing_content
        
        print(f"🔄 Extracting fresh content for {video_id}")
        url = f"https://www.youtube.com/watch?v={video_id}"
        
        try:
            # Get video info
            print(f"📹 Getting info for {video_id}...")
            
            with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
                info = ydl.extract_info(url, download=False)
            
            duration = info.get('duration', 0)
            title = info.get('title', 'Unknown')
            
            print(f"Video: {title}")
            
            # Try captions first
            extraction_method = ""
            success, transcript = self.extract_captions(video_id, url)
            
            if success:
                extraction_method = "captions"
            else:
                # Try audio extraction
                success, audio_file = self.extract_audio(video_id, url)
                
                if success:
                    transcript = self.transcribe_audio(audio_file)
                    extraction_method = "audio_transcription"
                    
                    # Cleanup audio file
                    try:
                        os.remove(audio_file)
                    except:
                        pass
                    
                    if not transcript:
                        content = ExtractedContent(
                            video_id=video_id,
                            transcript="",
                            duration=duration,
                            language="en",
                            word_count=0,
                            success=False,
                            video_title=title,
                            extraction_method=extraction_method,
                            extraction_timestamp=datetime.now().isoformat(),
                            error_message="Transcription failed"
                        )
                        self.save_transcript(content)  # Save even failed attempts
                        return content
                else:
                    content = ExtractedContent(
                        video_id=video_id,
                        transcript="",
                        duration=duration,
                        language="en",
                        word_count=0,
                        success=False,
                        video_title=title,
                        extraction_method="failed",
                        extraction_timestamp=datetime.now().isoformat(),
                        error_message="Audio extraction failed"
                    )
                    self.save_transcript(content)  # Save even failed attempts
                    return content
            
            # Success!
            word_count = len(transcript.split()) if transcript else 0
            
            content = ExtractedContent(
                video_id=video_id,
                transcript=transcript,
                duration=duration,
                language="en",
                word_count=word_count,
                success=True,
                video_title=title,
                extraction_method=extraction_method,
                extraction_timestamp=datetime.now().isoformat()
            )
            
            # Save the successful extraction
            self.save_transcript(content)
            
            return content
            
        except Exception as e:
            content = ExtractedContent(
                video_id=video_id,
                transcript="",
                duration=0,
                language="en",
                word_count=0,
                success=False,
                video_title="Unknown",
                extraction_method="failed",
                extraction_timestamp=datetime.now().isoformat(),
                error_message=str(e)
            )
            self.save_transcript(content)  # Save even failed attempts
            return content
    
    def list_saved_transcripts(self) -> list[Dict[str, Any]]:
        """List all saved transcripts"""
        transcripts = []
        
        for file_path in self.save_dir.glob("*_transcript.json"):
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                transcripts.append({
                    'video_id': data.get('video_id', 'unknown'),
                    'title': data.get('video_title', 'Unknown'),
                    'word_count': data.get('word_count', 0),
                    'duration': data.get('duration', 0),
                    'success': data.get('success', False),
                    'extraction_method': data.get('extraction_method', 'unknown'),
                    'saved_at': data.get('extraction_timestamp', 'unknown'),
                    'file_path': str(file_path)
                })
                
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
        
        return sorted(transcripts, key=lambda x: x['saved_at'], reverse=True)
    
    def delete_transcript(self, video_id: str) -> bool:
        """Delete saved transcript"""
        try:
            save_file = self.get_save_filename(video_id)
            if save_file.exists():
                save_file.unlink()
                print(f"✅ Deleted transcript for {video_id}")
                return True
            else:
                print(f"❌ No transcript found for {video_id}")
                return False
        except Exception as e:
            print(f"❌ Failed to delete transcript: {e}")
            return False


# Enhanced integration function
def extract_youtube_content_with_save(input_result: Dict[str, Any], save_dir: str = "transcripts", force_refresh: bool = False) -> Dict[str, Any]:
    """Enhanced extraction with save/load functionality"""
    
    if not input_result.get('success', False):
        return {
            'success': False,
            'error': 'Input processing failed',
            'user_message': 'Cannot proceed without valid input'
        }
    
    metadata = input_result.get('metadata')
    if not metadata:
        return {
            'success': False,
            'error': 'No metadata available',
            'user_message': 'Missing video metadata'
        }
    
    # Extract content with save functionality
    extractor = WorkingYouTubeExtractor(save_dir=save_dir)
    content = extractor.extract_content(metadata.video_id, force_refresh=force_refresh)
    
    if content.success:
        load_method = "📁 Loaded from cache" if not force_refresh and extractor.get_save_filename(metadata.video_id).exists() else "🔄 Freshly extracted"
        
        user_message = f"""✅ Content extraction successful! {load_method}

📹 Video: {content.video_title}
⏱️ Duration: {content.duration}s
📝 Word Count: {content.word_count:,}
🔧 Method: {content.extraction_method}
💾 Saved to: {extractor.get_save_filename(content.video_id)}

📊 Preview: {content.transcript[:200]}...

Ready for next stage!"""
        
        return {
            'success': True,
            'content': content,
            'user_message': user_message,
            'save_location': str(extractor.get_save_filename(content.video_id))
        }
    else:
        return {
            'success': False,
            'error': content.error_message,
            'user_message': f"Extraction failed: {content.error_message}"
        }


def test_extractor_with_save():
    """Enhanced test function with save functionality"""
    print("Testing YouTube Content Extractor with Save")
    print("=" * 50)
    
    extractor = WorkingYouTubeExtractor(save_dir="transcripts")
    
    # Test video
    video_id = "HsyYnY674sE"  # Leadership video
    
    print(f"🔄 First extraction (will save):")
    result1 = extractor.extract_content(video_id)
    
    if result1.success:
        print("✅ SUCCESS!")
        print(f"Duration: {result1.duration}s")
        print(f"Words: {result1.word_count}")
        print(f"Method: {result1.extraction_method}")
        print(f"Preview: {result1.transcript[:100]}...")
        
        print(f"\n📁 Second extraction (should load from cache):")
        result2 = extractor.extract_content(video_id)
        
        if result2.success:
            print("✅ SUCCESS! (Loaded from cache)")
            
        print(f"\n📋 Saved transcripts:")
        saved_list = extractor.list_saved_transcripts()
        for item in saved_list[:3]:  # Show first 3
            print(f"   • {item['video_id']}: {item['title']} ({item['word_count']} words)")
            
    else:
        print("❌ FAILED!")
        print(f"Error: {result1.error_message}")


if __name__ == "__main__":
    test_extractor_with_save()