"""
File-based caching system for course recommendations
"""
import json
import os
import hashlib
import time
from datetime import datetime, timedelta
from typing import Dict, Optional, List
import logging

logger = logging.getLogger(__name__)

class FileCourseCache:
    """File-based cache for course recommendations"""
    
    def __init__(self, cache_dir: str = "course_cache", cache_duration_hours: int = 168):  # 7 days default
        """
        Initialize the file-based cache system
        
        Args:
            cache_dir: Directory to store cache files
            cache_duration_hours: How long to keep cache (default 7 days)
        """
        self.cache_dir = cache_dir
        self.cache_duration = timedelta(hours=cache_duration_hours)
        
        # Create cache directory if it doesn't exist
        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)
            logger.info(f"Created cache directory: {self.cache_dir}")
    
    def _generate_cache_key(self, domain: str, field_prompt: str, course_type: str = "online") -> str:
        """Generate cache key from domain and field prompt"""
        cache_data = f"{domain}_{field_prompt}_{course_type}"
        return hashlib.md5(cache_data.encode()).hexdigest()
    
    def _get_cache_filename(self, cache_key: str) -> str:
        """Get full path for cache file"""
        return os.path.join(self.cache_dir, f"courses_{cache_key}.json")
    
    def get_cached_courses(self, domain: str, field_prompt: str, course_type: str = "online") -> Optional[List[Dict]]:
        """
        Get cached course recommendations
        
        Args:
            domain: The domain (e.g., "Technology")
            field_prompt: The field/skill prompt
            course_type: Type of courses ("online" or "offline")
            
        Returns:
            Cached course data or None if not found/expired
        """
        try:
            cache_key = self._generate_cache_key(domain, field_prompt, course_type)
            cache_file = self._get_cache_filename(cache_key)
            
            if not os.path.exists(cache_file):
                return None
            
            # Check if cache file is still valid
            file_time = datetime.fromtimestamp(os.path.getmtime(cache_file))
            if datetime.now() - file_time > self.cache_duration:
                # Cache expired, remove file
                os.remove(cache_file)
                logger.info(f"Cache expired and removed: {cache_file}")
                return None
            
            # Read cache file
            with open(cache_file, 'r', encoding='utf-8') as f:
                cache_data = json.load(f)
            
            logger.info(f"Cache hit for {domain} - {field_prompt} ({course_type})")
            return cache_data.get('courses', [])
            
        except Exception as e:
            logger.error(f"Error reading cache: {e}")
            return None
    
    def cache_courses(self, domain: str, field_prompt: str, courses: List[Dict], course_type: str = "online") -> bool:
        """
        Cache course recommendations to file
        
        Args:
            domain: The domain (e.g., "Technology")
            field_prompt: The field/skill prompt
            courses: List of course recommendations
            course_type: Type of courses ("online" or "offline")
            
        Returns:
            True if caching successful, False otherwise
        """
        try:
            cache_key = self._generate_cache_key(domain, field_prompt, course_type)
            cache_file = self._get_cache_filename(cache_key)
            
            cache_data = {
                'domain': domain,
                'field_prompt': field_prompt,
                'course_type': course_type,
                'cached_at': datetime.now().isoformat(),
                'courses': courses
            }
            
            with open(cache_file, 'w', encoding='utf-8') as f:
                json.dump(cache_data, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Cached {len(courses)} courses for {domain} - {field_prompt} ({course_type})")
            return True
            
        except Exception as e:
            logger.error(f"Error caching courses: {e}")
            return False
    
    def clear_cache(self) -> bool:
        """Clear all cache files"""
        try:
            for filename in os.listdir(self.cache_dir):
                if filename.startswith('courses_') and filename.endswith('.json'):
                    os.remove(os.path.join(self.cache_dir, filename))
            
            logger.info("Cache cleared successfully")
            return True
            
        except Exception as e:
            logger.error(f"Error clearing cache: {e}")
            return False
    
    def get_cache_stats(self) -> Dict:
        """Get cache statistics"""
        try:
            cache_files = [f for f in os.listdir(self.cache_dir) if f.startswith('courses_') and f.endswith('.json')]
            
            valid_caches = 0
            expired_caches = 0
            total_courses = 0
            
            for cache_file in cache_files:
                full_path = os.path.join(self.cache_dir, cache_file)
                file_time = datetime.fromtimestamp(os.path.getmtime(full_path))
                
                if datetime.now() - file_time <= self.cache_duration:
                    valid_caches += 1
                    # Count courses in this cache
                    try:
                        with open(full_path, 'r', encoding='utf-8') as f:
                            cache_data = json.load(f)
                            total_courses += len(cache_data.get('courses', []))
                    except:
                        pass
                else:
                    expired_caches += 1
            
            return {
                'total_cache_files': len(cache_files),
                'valid_caches': valid_caches,
                'expired_caches': expired_caches,
                'total_cached_courses': total_courses,
                'cache_duration_hours': self.cache_duration.total_seconds() / 3600
            }
            
        except Exception as e:
            logger.error(f"Error getting cache stats: {e}")
            return {'error': str(e)}
    
    def cleanup_expired_cache(self) -> int:
        """Remove expired cache files"""
        try:
            cache_files = [f for f in os.listdir(self.cache_dir) if f.startswith('courses_') and f.endswith('.json')]
            removed_count = 0
            
            for cache_file in cache_files:
                full_path = os.path.join(self.cache_dir, cache_file)
                file_time = datetime.fromtimestamp(os.path.getmtime(full_path))
                
                if datetime.now() - file_time > self.cache_duration:
                    os.remove(full_path)
                    removed_count += 1
            
            if removed_count > 0:
                logger.info(f"Cleaned up {removed_count} expired cache files")
            
            return removed_count
            
        except Exception as e:
            logger.error(f"Error cleaning up cache: {e}")
            return 0

# Global cache instance
course_cache = FileCourseCache()