import json
import logging
from typing import Dict, List, Optional, Set
from dataclasses import dataclass
from difflib import SequenceMatcher
import re
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class Skill:
    """Data class for skill information"""
    skill_id: int
    skill_name: str
    skill_level: Optional[str] = None
    skill_count: Optional[int] = None
    source: str = "unknown"

@dataclass
class SkillsRequest:
    """Data class for skills identification request"""
    user_prompt: str
    department_info: Dict
    employee_count: int
    filtered_skills: List[Dict]  # Department skills with metadata
    organization_skills: List[Dict]  # Organization-wide skills

@dataclass
class SkillsResponse:
    """Data class for skills identification response"""
    skills: List[Skill]
    total_skills: int
    skills_by_source: Dict[str, int]
    recommended_level: str

class SkillsIdentificationEngine:
    """Handles skills identification using LLM and existing skill data"""
    
    def __init__(self, model_name: str = "gemma3:12b", target_skills_count: int = 15):
        """Initialize the skills identification engine"""
        try:
            self.llm = ChatOllama(model=model_name)
            self.target_skills_count = target_skills_count
            logger.info(f"Skills engine initialized with model: {model_name}")
        except Exception as e:
            logger.error(f"Failed to initialize LLM: {e}")
            raise
    
    def identify_skills(self, request: SkillsRequest) -> SkillsResponse:
        """
        Main method to identify and prioritize skills
        
        Args:
            request: SkillsRequest containing all necessary data
            
        Returns:
            SkillsResponse with identified skills
        """
        try:
            # Step 1: Determine skill source strategy
            use_department_skills = (
                request.department_info.get('matched', False) and 
                request.employee_count >= 3
            )
            
            logger.info(f"Using department skills: {use_department_skills}")
            
            # Step 2: Determine recommended skill level based on prompt
            recommended_level = self._determine_skill_level(request.user_prompt)
            
            # Step 3: Get and filter available skills
            available_skills = self._get_available_skills(
                request, use_department_skills
            )
            
            # Step 4: Filter skills by relevance to user prompt
            relevant_skills = self._filter_by_relevance(
                available_skills, request.user_prompt, recommended_level
            )
            
            # Step 5: Remove duplicates
            unique_skills = self._remove_duplicates(relevant_skills)
            
            # Step 6: Fill gaps with external skills if needed
            if len(unique_skills) < self.target_skills_count:
                external_skills = self._generate_external_skills(
                    request.user_prompt, 
                    request.department_info.get('inferred_department', ''),
                    recommended_level,
                    self.target_skills_count - len(unique_skills),
                    [skill.skill_name for skill in unique_skills]  # Exclude existing
                )
                unique_skills.extend(external_skills)
            
            # Step 7: Final selection and ranking
            final_skills = unique_skills[:self.target_skills_count]
            
            # Step 8: Calculate statistics
            skills_by_source = self._calculate_source_statistics(final_skills)
            
            response = SkillsResponse(
                skills=final_skills,
                total_skills=len(final_skills),
                skills_by_source=skills_by_source,
                recommended_level=recommended_level
            )
            
            logger.info(f"Skills identification completed: {len(final_skills)} skills identified")
            return response
            
        except Exception as e:
            logger.error(f"Error in skills identification: {e}")
            return SkillsResponse(
                skills=[],
                total_skills=0,
                skills_by_source={},
                recommended_level="Intermediate"
            )
    
    def _determine_skill_level(self, user_prompt: str) -> str:
        """Determine recommended skill level based on user prompt"""
        
        prompt_template = PromptTemplate(
            input_variables=["user_prompt"],
            template="""
            Analyze the following user prompt and determine what skill level would be most appropriate for their learning/improvement needs.
            
            User Goal/Challenge: {user_prompt}
            
            Consider these indicators:
            - Beginner: Words like "learn", "basic", "introduction", "start", "new to"
            - Intermediate: Words like "improve", "enhance", "develop", "better", "advance"
            - Advanced: Words like "master", "expert", "lead", "advanced", "complex", "strategic"
            
            Instructions:
            1. Analyze the user's language and intent
            2. Identify their current implied skill level
            3. Recommend the appropriate target level
            4. Return ONLY one word: "Beginner", "Intermediate", or "Advanced"
            
            Recommended Level:
            """
        )
        
        try:
            formatted_prompt = prompt_template.format(user_prompt=user_prompt)
            response = self.llm.invoke([HumanMessage(content=formatted_prompt)])
            
            level = response.content.strip()
            # Validate and clean response
            valid_levels = ["Beginner", "Intermediate", "Advanced"]
            for valid_level in valid_levels:
                if valid_level.lower() in level.lower():
                    return valid_level
            
            # Default to Intermediate if unclear
            return "Intermediate"
            
        except Exception as e:
            logger.error(f"Error determining skill level: {e}")
            return "Intermediate"
    
    def _get_available_skills(self, request: SkillsRequest, use_department_skills: bool) -> List[Skill]:
        """Get available skills based on strategy"""
        
        skills = []
        
        if use_department_skills:
            # Use filtered department skills (higher priority)
            for skill_data in request.filtered_skills:
                skill = Skill(
                    skill_id=skill_data.get('skill_id'),
                    skill_name=skill_data.get('skill_name'),
                    skill_level=skill_data.get('skill_level'),
                    skill_count=skill_data.get('skill_count', 0),
                    source="department"
                )
                skills.append(skill)
        else:
            # Use organization skills
            for skill_data in request.organization_skills:
                skill = Skill(
                    skill_id=skill_data.get('skill_id'),
                    skill_name=skill_data.get('skill_name'),
                    skill_level=None,  # Organization skills don't have level info
                    skill_count=0,
                    source="organization"
                )
                skills.append(skill)
        
        return skills
    
    def _filter_by_relevance(self, skills: List[Skill], user_prompt: str, recommended_level: str) -> List[Skill]:
        """Filter and rank skills by relevance to user prompt"""
        
        scored_skills = []
        
        for skill in skills:
            # Calculate relevance score
            relevance_score = self._calculate_skill_relevance(skill.skill_name, user_prompt)
            
            # Adjust score based on skill level match
            if skill.skill_level:
                level_bonus = self._calculate_level_bonus(skill.skill_level, recommended_level)
                relevance_score += level_bonus
            
            # Add usage frequency bonus for department skills
            if skill.source == "department" and skill.skill_count:
                usage_bonus = min(skill.skill_count * 0.01, 0.1)  # Max 0.1 bonus
                relevance_score += usage_bonus
            
            scored_skills.append((skill, relevance_score))
        
        # Sort by relevance score (descending)
        scored_skills.sort(key=lambda x: x[1], reverse=True)
        
        # Return skills with minimum relevance threshold
        threshold = 0.3
        return [skill for skill, score in scored_skills if score >= threshold]
    
    def _calculate_skill_relevance(self, skill_name: str, user_prompt: str) -> float:
        """Calculate how relevant a skill is to the user prompt"""
        
        # Simple text similarity for now
        skill_lower = skill_name.lower()
        prompt_lower = user_prompt.lower()
        
        # Exact match
        if skill_lower in prompt_lower:
            return 1.0
        
        # Word overlap
        skill_words = set(skill_lower.split())
        prompt_words = set(prompt_lower.split())
        
        if skill_words and prompt_words:
            overlap = len(skill_words.intersection(prompt_words))
            union = len(skill_words.union(prompt_words))
            return overlap / union if union > 0 else 0.0
        
        # Sequence similarity
        return SequenceMatcher(None, skill_lower, prompt_lower).ratio()
    
    def _calculate_level_bonus(self, skill_level: str, recommended_level: str) -> float:
        """Calculate bonus score for skill level match"""
        
        if skill_level == recommended_level:
            return 0.2  # Exact match bonus
        
        # Level progression bonus
        level_order = ["Beginner", "Intermediate", "Advanced"]
        try:
            skill_idx = level_order.index(skill_level)
            rec_idx = level_order.index(recommended_level)
            
            # Prefer skills one level up from current
            if skill_idx == rec_idx + 1:
                return 0.1
            elif skill_idx == rec_idx - 1:
                return 0.05
        except ValueError:
            pass
        
        return 0.0
    
    def _remove_duplicates(self, skills: List[Skill]) -> List[Skill]:
        """Remove duplicate skills based on skill name similarity"""
        
        unique_skills = []
        seen_names = set()
        
        for skill in skills:
            # Normalize skill name for comparison
            normalized_name = self._normalize_skill_name(skill.skill_name)
            
            # Check if we've seen this skill before
            is_duplicate = False
            for seen_name in seen_names:
                if self._are_skills_similar(normalized_name, seen_name):
                    is_duplicate = True
                    break
            
            if not is_duplicate:
                unique_skills.append(skill)
                seen_names.add(normalized_name)
        
        return unique_skills
    
    def _normalize_skill_name(self, skill_name: str) -> str:
        """Normalize skill name for comparison"""
        return re.sub(r'[^a-zA-Z0-9\s]', '', skill_name.lower()).strip()
    
    def _are_skills_similar(self, name1: str, name2: str) -> bool:
        """Check if two skill names are similar (potential duplicates)"""
        similarity = SequenceMatcher(None, name1, name2).ratio()
        return similarity > 0.8  # 80% similarity threshold
    
    def _generate_external_skills(self, user_prompt: str, department: str, 
                                recommended_level: str, count_needed: int, 
                                existing_skills: List[str]) -> List[Skill]:
        """Generate external skills using LLM"""
        
        prompt_template = PromptTemplate(
            input_variables=["user_prompt", "department", "recommended_level", "count_needed", "existing_skills"],
            template="""
            Generate {count_needed} relevant skills for the following scenario:
            
            User Goal: {user_prompt}
            Department: {department}
            Recommended Level: {recommended_level}
            
            Existing Skills to Avoid: {existing_skills}
            
            Requirements:
            1. Generate exactly {count_needed} skills
            2. Skills should be relevant to the user's goal and department
            3. Skills should be appropriate for {recommended_level} level
            4. Avoid duplicating existing skills
            5. Focus on practical, actionable skills
            6. Return only skill names, one per line
            7. Use clear, professional skill names
            
            Skills:
            """
        )
        
        try:
            formatted_prompt = prompt_template.format(
                user_prompt=user_prompt,
                department=department,
                recommended_level=recommended_level,
                count_needed=count_needed,
                existing_skills=", ".join(existing_skills)
            )
            
            response = self.llm.invoke([HumanMessage(content=formatted_prompt)])
            
            # Parse response
            skill_lines = response.content.strip().split('\n')
            external_skills = []
            
            for i, line in enumerate(skill_lines):
                if line.strip() and len(external_skills) < count_needed:
                    skill_name = line.strip().strip('- •').strip()
                    if skill_name:
                        external_skills.append(Skill(
                            skill_id=1000 + i,  # Temporary ID for external skills
                            skill_name=skill_name,
                            skill_level=recommended_level,
                            skill_count=0,
                            source="external"
                        ))
            
            return external_skills
            
        except Exception as e:
            logger.error(f"Error generating external skills: {e}")
            return []
    
    def _calculate_source_statistics(self, skills: List[Skill]) -> Dict[str, int]:
        """Calculate statistics by skill source"""
        
        stats = {"department": 0, "organization": 0, "external": 0}
        
        for skill in skills:
            if skill.source in stats:
                stats[skill.source] += 1
        
        return stats
    
    def process_json_request(self, json_data: str) -> Dict:
        """Process JSON request and return JSON response"""
        
        try:
            request_data = json.loads(json_data)
            
            # Parse request
            skills_request = SkillsRequest(
                user_prompt=request_data['user_prompt'],
                department_info=request_data['department_info'],
                employee_count=request_data['employee_count'],
                filtered_skills=request_data.get('filtered_skills', []),
                organization_skills=request_data.get('organization_skills', [])
            )
            
            # Process request
            response = self.identify_skills(skills_request)
            
            # Convert to JSON response
            return {
                'success': True,
                'skills': [
                    {
                        'skill_id': skill.skill_id,
                        'skill_name': skill.skill_name,
                        'skill_level': skill.skill_level,
                        'skill_count': skill.skill_count,
                        'source': skill.source
                    }
                    for skill in response.skills
                ],
                'total_skills': response.total_skills,
                'skills_by_source': response.skills_by_source,
                'recommended_level': response.recommended_level
            }
            
        except Exception as e:
            logger.error(f"Error processing skills request: {e}")
            return {
                'success': False,
                'error': str(e),
                'skills': [],
                'total_skills': 0
            }

# Example usage and testing
def main():
    """Example usage of the Skills Identification System"""
    
    # Initialize system
    skills_engine = SkillsIdentificationEngine(target_skills_count=15)
    
    # Sample request data
    sample_request = {
        "user_prompt": "I want to improve my team's Technical ability",
        "department_info": {
            "department_id": 171,
            "department_name": "developer",
            "matched": True,
            "inferred_department": ""
        },
        "employee_count": 5,
        "filtered_skills": [
            {"skill_id": 1, "skill_name": "Sales Techniques", "skill_level": "Intermediate", "skill_count": 15},
            {"skill_id": 2, "skill_name": "Customer Relationship Management", "skill_level": "Advanced", "skill_count": 12},
            {"skill_id": 3, "skill_name": "Lead Generation", "skill_level": "Beginner", "skill_count": 8},
            {"skill_id": 4, "skill_name": "Negotiation Skills", "skill_level": "Intermediate", "skill_count": 10},
            {"skill_id": 5, "skill_name": "Product Knowledge", "skill_level": "Advanced", "skill_count": 20}
        ],
        "organization_skills": [
            {"skill_id": 101, "skill_name": "Communication Skills"},
            {"skill_id": 102, "skill_name": "Team Leadership"},
            {"skill_id": 103, "skill_name": "Project Management"},
            {"skill_id": 104, "skill_name": "Data Analysis"},
            {"skill_id": 105, "skill_name": "Problem Solving"}
        ]
    }
    
    # Process request
    print("Processing Skills Identification request...")
    result = skills_engine.process_json_request(json.dumps(sample_request))
    
    # Display results
    print("\nSkills Identification Results:")
    print(f"Success: {result['success']}")
    print(f"Total Skills: {result['total_skills']}")
    print(f"Recommended Level: {result['recommended_level']}")
    print(f"Skills by Source: {result['skills_by_source']}")
    
    print("\nIdentified Skills:")
    for i, skill in enumerate(result['skills'], 1):
        print(f"{i}. {skill['skill_name']} (Level: {skill['skill_level']}, Source: {skill['source']})")

if __name__ == "__main__":
    main()