import json
import logging
from typing import Dict, List, Optional
from dataclasses import dataclass
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class Course:
    """Data class for course information"""
    course_id: str
    name: str
    short_description: str
    description: str
    skill: str

@dataclass
class CourseFilterRequest:
    """Data class for course filtering request"""
    identified_field: str
    courses: List[Course]

@dataclass
class CourseFilterResponse:
    """Data class for course filtering response"""
    identified_field: str
    total_courses_input: int
    matching_courses: List[Course]
    total_matching_courses: int
    filtering_reasoning: str

class CourseFieldIdentifier:
    """Handles course filtering based on identified field using LLM"""
    
    def __init__(self, model_name: str = "gemma3:12b"):
        """Initialize the course field identifier"""
        try:
            self.llm = ChatOllama(model=model_name)
            logger.info(f"Course Field Identifier initialized with model: {model_name}")
        except Exception as e:
            logger.error(f"Failed to initialize Course Field Identifier: {e}")
            raise
    
    def filter_courses_by_field(self, identified_field: str, courses: List[Course]) -> CourseFilterResponse:
        """
        Filter courses that match the identified field using LLM analysis
        
        Args:
            identified_field: The field identified from skill analysis (e.g., "Frontend Development")
            courses: List of all available courses
            
        Returns:
            CourseFilterResponse with matching courses
        """
        try:
            # Prepare courses data for LLM
            courses_text = self._format_courses_for_llm(courses)
            
            # Get LLM analysis
            matching_course_ids, reasoning = self._get_llm_course_analysis(
                identified_field, courses_text
            )
            
            # Filter matching courses
            matching_courses = [
                course for course in courses 
                if course.course_id in matching_course_ids
            ]
            
            response = CourseFilterResponse(
                identified_field=identified_field,
                total_courses_input=len(courses),
                matching_courses=matching_courses,
                total_matching_courses=len(matching_courses),
                filtering_reasoning=reasoning
            )
            
            logger.info(f"Filtered {len(matching_courses)} courses for {identified_field} field")
            return response
            
        except Exception as e:
            logger.error(f"Error in course filtering: {e}")
            return CourseFilterResponse(
                identified_field=identified_field,
                total_courses_input=len(courses),
                matching_courses=[],
                total_matching_courses=0,
                filtering_reasoning=f"Error in filtering: {str(e)}"
            )
    
    def _format_courses_for_llm(self, courses: List[Course]) -> str:
        """Format courses data for LLM input"""
        
        courses_formatted = []
        for course in courses:
            # Combine all course information
            full_description = f"{course.name}"
            if course.short_description:
                full_description += f" - {course.short_description}"
            if course.description:
                full_description += f" | {course.description}"
            if course.skill:
                full_description += f" | Skills: {course.skill}"
            
            courses_formatted.append(f"ID: {course.course_id} | {full_description}")
        
        return "\n".join(courses_formatted)
    
    def _get_llm_course_analysis(self, identified_field: str, courses_text: str) -> tuple:
        """Get LLM analysis of which courses match the identified field"""
        
        prompt_template = PromptTemplate(
            input_variables=["identified_field", "courses_text"],
            template="""
            You are an EXPERT course curator. Be VERY STRICT in matching courses to the field.
            
            TARGET FIELD: {identified_field}
            
            COURSES TO EVALUATE:
            {courses_text}
            
            STRICT MATCHING CRITERIA:
            
            For {identified_field} field, ONLY approve courses that:
            1. Directly teach {identified_field} skills and knowledge
            2. Use tools/technologies commonly used in {identified_field}
            3. Solve real problems that {identified_field} professionals face
            4. Build competencies essential for {identified_field} roles
            
            DEFINITELY REJECT:
            - Courses from completely different fields (e.g., Programming courses for Sales)
            - General knowledge or academic subjects not job-relevant
            - Courses too basic/generic to help {identified_field} professionals
            - Administrative, testing, or demo content
            
            EXAMPLES FOR DIFFERENT FIELDS:
            
            Sales Field - APPROVE: "Sales Techniques", "CRM Training", "Negotiation Skills", "Customer Relations"
            Sales Field - REJECT: "Python Programming", "Angular Development", "Database Administration"
            
            Frontend Development - APPROVE: "Angular", "React", "JavaScript", "HTML/CSS", "UI/UX"
            Frontend Development - REJECT: "Sales Training", "HR Management", "Accounting Principles"
            
            BE EXTREMELY SELECTIVE. When in doubt, REJECT the course.
            
            Return JSON:
            {{
                "matching_course_ids": ["id1", "id2"],
                "reasoning": "Detailed explanation of why specific courses were selected/rejected for {identified_field}"
            }}
            
            Analysis for {identified_field}:
            """
        )
        
        try:
            formatted_prompt = prompt_template.format(
                identified_field=identified_field,
                courses_text=courses_text
            )
            
            response = self.llm.invoke([HumanMessage(content=formatted_prompt)])
            
            # Parse LLM response
            analysis_result = self._parse_llm_response(response.content)
            
            matching_course_ids = analysis_result.get("matching_course_ids", [])
            reasoning = analysis_result.get("reasoning", "LLM analysis completed")
            
            logger.info(f"LLM identified {len(matching_course_ids)} matching courses")
            return matching_course_ids, reasoning
            
        except Exception as e:
            logger.error(f"Error in LLM course analysis: {e}")
            return [], f"Error in LLM analysis: {str(e)}"
    
    def _parse_llm_response(self, response_content: str) -> Dict:
        """Parse LLM response to extract course IDs and reasoning"""
        
        try:
            # Try to extract JSON from response
            import re
            json_match = re.search(r'\{.*\}', response_content, re.DOTALL)
            
            if json_match:
                parsed_response = json.loads(json_match.group())
                
                # Validate structure
                if "matching_course_ids" in parsed_response:
                    return parsed_response
            
            # Fallback: try to extract course IDs from text
            return self._fallback_parse_response(response_content)
            
        except json.JSONDecodeError as e:
            logger.warning(f"JSON parsing failed: {e}")
            return self._fallback_parse_response(response_content)
        
        except Exception as e:
            logger.error(f"Error parsing LLM response: {e}")
            return {"matching_course_ids": [], "reasoning": "Failed to parse LLM response"}
    
    def _fallback_parse_response(self, response_content: str) -> Dict:
        """Fallback parsing when JSON extraction fails"""
        
        import re
        
        # Look for course ID patterns in text
        course_id_patterns = [
            r'ID:\s*(\w+)',  # "ID: 123"
            r'Course\s*(\w+)',  # "Course 123"
            r'^\s*(\w+)\s*-',  # "123 - Course Name"
        ]
        
        matching_ids = []
        for pattern in course_id_patterns:
            matches = re.findall(pattern, response_content, re.MULTILINE)
            matching_ids.extend(matches)
        
        # Remove duplicates
        matching_ids = list(set(matching_ids))
        
        return {
            "matching_course_ids": matching_ids,
            "reasoning": "Extracted course IDs from text response (fallback parsing)"
        }
    
    def process_json_request(self, json_data: str) -> Dict:
        """
        Process JSON request and return JSON response
        
        Args:
            json_data: JSON string with identified_field and courses data
            
        Returns:
            Dictionary with filtered courses
        """
        
        try:
            request_data = json.loads(json_data)
            
            # Extract identified field
            identified_field = request_data.get('identified_field', '')
            if not identified_field:
                return {
                    'success': False,
                    'error': 'No identified_field provided',
                    'matching_courses': []
                }
            
            # Parse courses
            courses_data = request_data.get('courses', [])
            courses = []
            
            for course_data in courses_data:
                course = Course(
                    course_id=str(course_data.get('course_id', '')),
                    name=course_data.get('course_name', ''),
                    short_description=course_data.get('course_short_description', ''),
                    description=course_data.get('course_description', ''),
                    skill=course_data.get('skill', '')
                )
                courses.append(course)
            
            # Filter courses
            filter_response = self.filter_courses_by_field(identified_field, courses)
            
            # Convert to JSON response
            matching_courses_json = []
            for course in filter_response.matching_courses:
                matching_courses_json.append({
                    'course_id': course.course_id,
                    'course_name': course.name,
                    'course_short_description': course.short_description,
                    'course_description': course.description,
                    'skill': course.skill
                })
            
            return {
                'success': True,
                'identified_field': filter_response.identified_field,
                'total_courses_input': filter_response.total_courses_input,
                'matching_courses': matching_courses_json,
                'total_matching_courses': filter_response.total_matching_courses,
                'filtering_reasoning': filter_response.filtering_reasoning
            }
            
        except json.JSONDecodeError as e:
            logger.error(f"JSON decode error: {e}")
            return {
                'success': False,
                'error': f'Invalid JSON format: {e}',
                'matching_courses': []
            }
        
        except Exception as e:
            logger.error(f"Processing error: {e}")
            return {
                'success': False,
                'error': str(e),
                'matching_courses': []
            }

# Example usage and testing
def main():
    """Example usage of the Course Field Identifier System"""
    
    # Initialize system
    course_identifier = CourseFieldIdentifier()
    
    # Sample request data
    sample_request = {
        "identified_field": "Sales ",
        "courses": [
            {
            "course_id": "32",
            "course_name": "General Knowledge",
            "course_short_description": "General Knowledge",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "33",
            "course_name": "Current Affairs",
            "course_short_description": "Current Affairs",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "43",
            "course_name": "Python Programming for Complete Beginners ",
            "course_short_description": "Python Programming for Complete Beginners ",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "58",
            "course_name": "Laravel Beginner",
            "course_short_description": "Laravel Beginner description ",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "59",
            "course_name": "Codeigntier Beginner",
            "course_short_description": "Codeigntier Beginner short desc",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "60",
            "course_name": "New CCourse ",
            "course_short_description": "New COurse short desc",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "63",
            "course_name": "Angular Basic",
            "course_short_description": "Angular is a platform and framework for building single-page client applications using HTML and TypeScript. Angular is written in TypeScript. It implements core and optional functionality as a set of TypeScript libraries that you import into your applicat",
            "course_description": "<p>Angular is a platform and framework for building single-page client applications using HTML and TypeScript. Angular is written in TypeScript. It implements core and optional functionality as a set of TypeScript libraries that you import into your applications.<\/p><p>The architecture of an Angular application relies on certain fundamental concepts. The basic building blocks of the Angular framework are Angular components that are organized into <i>NgModules<\/i>. NgModules collect related code into functional sets; an Angular application is defined by a set of NgModules. An application always has at least a <i>root module<\/i> that enables bootstrapping, and typically has many more <i>feature modules<\/i>.<\/p><p>Components define <i>views<\/i>, which are sets of screen elements that Angular can choose among and modify according to your program logic and data.<\/p><p>Components use <i>services<\/i>, which provide specific functionality not directly related to views. Service providers can be <i>injected<\/i> into components as <i>dependencies<\/i>, making your code modular, reusable, and efficient.<\/p>",
            "skills": []
        },
        {
            "course_id": "64",
            "course_name": "Leadership Chapter 1",
            "course_short_description": "Leadership Chapter 1",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "65",
            "course_name": "Sorting",
            "course_short_description": "Sorting",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "66",
            "course_name": "CSS Selector",
            "course_short_description": "CSS Selector",
            "course_description": "<ul><li>CSS Selector<\/li><li>css<\/li><\/ul>",
            "skills": []
        },
        {
            "course_id": "68",
            "course_name": "simple course",
            "course_short_description": "simple course",
            "course_description": "<ol><li><strong>Lorem Ipsum<\/strong> is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book.<\/li><li>hhgh<\/li><li>hjhjhj<\/li><\/ol>",
            "skills": []
        },
        {
            "course_id": "69",
            "course_name": "report testing",
            "course_short_description": "report testing",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "70",
            "course_name": "Author Course",
            "course_short_description": "Author Courses",
            "course_description": "<p>Author Course<\/p>",
            "skills": []
        },
        {
            "course_id": "71",
            "course_name": "Demo Course",
            "course_short_description": "Demo Course",
            "course_description": "<p>Demo Course<\/p>",
            "skills": []
        },
        {
            "course_id": "72",
            "course_name": "Course By Author",
            "course_short_description": "Course By Author",
            "course_description": "<p>Course By Author<\/p>",
            "skills": []
        },
        {
            "course_id": "73",
            "course_name": "Java Course",
            "course_short_description": "Java Course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "90",
            "course_name": "Math Quiz Course",
            "course_short_description": "Math Quiz",
            "course_description": "<p>All quiz course<\/p>",
            "skills": []
        },
        {
            "course_id": "92",
            "course_name": "All Quiz Test",
            "course_short_description": "All Quiz Test",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "93",
            "course_name": "Math Course",
            "course_short_description": "Math Course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "94",
            "course_name": "Scorm Posh Course",
            "course_short_description": "Scorm Posh Course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "95",
            "course_name": "Scorm Info Security",
            "course_short_description": "Scorm Info Security",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "96",
            "course_name": "quiz analysis",
            "course_short_description": "quiz analysis",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "97",
            "course_name": "quiz analysis test",
            "course_short_description": "quiz analysis short desc",
            "course_description": "<p>description<\/p>",
            "skills": []
        },
        {
            "course_id": "98",
            "course_name": "create course",
            "course_short_description": "course desc",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "99",
            "course_name": "Quiz Test Course",
            "course_short_description": "Quiz Test Course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "100",
            "course_name": "new course",
            "course_short_description": "new course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "103",
            "course_name": "Scorm Warehouse",
            "course_short_description": "New Scorm Course",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "104",
            "course_name": "Leading Yourself Scorm",
            "course_short_description": "Leading Yourself Scorm",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "135",
            "course_name": "Certificate test",
            "course_short_description": "Certificate test",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "136",
            "course_name": "Certificate Test 2",
            "course_short_description": "Certificate Test 2",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "138",
            "course_name": "ASD12",
            "course_short_description": "ASD21",
            "course_description": "",
            "skills": []
        },
        {
            "course_id": "141",
            "course_name": "Tester",
            "course_short_description": "Tester",
            "course_description": "",
            "skills": []
        },
            {
                "course_id": "6",
                "course_name": "Database Administration",
                "course_short_description": "SQL and database management",
                "course_description": "Learn database design, SQL queries, and database administration",
                "skill": "sql, database, administration"
            }
        ]
    }
    
    # Process request
    print("Processing Course Field Identification...")
    print(f"Identified Field: {sample_request['identified_field']}")
    print(f"Total Input Courses: {len(sample_request['courses'])}")
    print("-" * 50)
    
    result = course_identifier.process_json_request(json.dumps(sample_request))
    
    # Display results
    if result['success']:
        print(f"✅ SUCCESS")
        print(f"Field: {result['identified_field']}")
        print(f"Input Courses: {result['total_courses_input']}")
        print(f"Matching Courses: {result['total_matching_courses']}")
        print(f"Reasoning: {result['filtering_reasoning']}")
        
        print(f"\n📚 MATCHING COURSES:")
        for i, course in enumerate(result['matching_courses'], 1):
            print(f"{i}. {course['course_name']} (ID: {course['course_id']})")
            print(f"   Description: {course['course_short_description']}")
            print(f"   Skills: {course['skill']}")
            print()
        
    else:
        print(f"❌ ERROR: {result['error']}")
    
    print("=" * 60)
    
    # Test with different field
    sample_request_2 = sample_request.copy()
    sample_request_2['identified_field'] = "Data Science"
    
    print(f"\nTesting with different field: Data Science")
    print("-" * 50)
    
    result_2 = course_identifier.process_json_request(json.dumps(sample_request_2))
    
    if result_2['success']:
        print(f"✅ SUCCESS")
        print(f"Field: {result_2['identified_field']}")
        print(f"Matching Courses: {result_2['total_matching_courses']}")
        
        print(f"\n📚 MATCHING COURSES:")
        for i, course in enumerate(result_2['matching_courses'], 1):
            print(f"{i}. {course['course_name']} (ID: {course['course_id']})")

if __name__ == "__main__":
    main()