import requests
from bs4 import BeautifulSoup
from typing import List, Dict, Any
from mcp_base import MCPBase

class KhanAcademyMCP(MCPBase):
    def __init__(self):
        self.base_url = "https://www.khanacademy.org"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
    
    def fetch_courses(self, user_skills, skill_gaps, max_results=5):
        """Fetch actual courses from Khan Academy based on skill gaps."""
        all_courses = []
        
        # We'll try to get at least 5 courses total across all skill gaps
        for gap in skill_gaps:
            query = gap['skill_name']
            courses = self._scrape_courses_for_skill(query)
            
            for course in courses:
                # Add the skill gap info to the course data
                course['related_skill'] = gap['skill_name']
                all_courses.append(course)
                
            # If we have enough courses, stop fetching
            if len(all_courses) >= max_results:
                break
                
        # Return up to max_results courses
        return all_courses[:max_results]
    
    def _scrape_courses_for_skill(self, skill_name):
        """Scrape Khan Academy search results to find relevant courses."""
        query = skill_name.replace(' ', '+')
        search_url = f"{self.base_url}/search?page_search_query={query}"
        
        try:
            response = requests.get(search_url, headers=self.headers)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            courses = []
            
            # Find search results - Khan Academy typically uses articles or divs with links for search results
            search_results = soup.select('._1lisot6l, .search-result-item, article')
            
            if not search_results:
                # Try alternative selectors if the main ones don't yield results
                search_results = soup.select('.result-container, .search-result')
            
            for result in search_results[:3]:  # Limit to 3 courses per skill to avoid overwhelming results
                # Extract course title
                title_element = result.select_one('h3, .title, ._z9axo')
                if not title_element:
                    continue
                    
                title = title_element.get_text().strip()
                
                # Extract course URL
                link_element = result.select_one('a')
                if link_element and link_element.has_attr('href'):
                    url = link_element['href']
                    if not url.startswith('http'):
                        url = f"{self.base_url}{url}"
                else:
                    continue
                
                # Extract description if available
                description_element = result.select_one('.description, ._1j9o38i, p')
                description = ""
                if description_element:
                    description = description_element.get_text().strip()
                
                courses.append({
                    "title": title,
                    "url": url,
                    "description": description,
                    "platform": "Khan Academy"
                })
            
            # If web scraping didn't work, add a search link as fallback
            if not courses:
                courses.append({
                    "title": f"Khan Academy courses for {skill_name}",
                    "url": search_url,
                    "description": f"Browse Khan Academy courses related to {skill_name}",
                    "platform": "Khan Academy",
                    "note": "Direct course extraction failed. This is a search results page."
                })
            
            return courses
            
        except Exception as e:
            # Handle any errors gracefully
            print(f"Error scraping Khan Academy for {skill_name}: {str(e)}")
            return [{
                "title": f"Khan Academy courses for {skill_name}",
                "url": search_url,
                "description": f"Browse Khan Academy courses related to {skill_name}",
                "platform": "Khan Academy",
                "note": f"Error fetching courses: {str(e)}"
            }]


if __name__ == "__main__":
    # Sample data for testing
    user_skills = [
        {"skill_id": "1", "skill_type": "1", "skill_name": "Management"},
        {"skill_id": "2", "skill_type": "2", "skill_name": "Problem Solving"},
        {"skill_id": "3", "skill_type": "2", "skill_name": "Data Analysis"},
        {"skill_id": "4", "skill_type": "2", "skill_name": "Critical Thinking"},
        {"skill_id": "5", "skill_type": "2", "skill_name": "Economics"}
    ]
    
    skill_gaps = [
        {"skill_id": "1", "skill_name": "Management", "current_level": 1, "target_level": 3},
        {"skill_id": "2", "skill_name": "Problem Solving", "current_level": 1, "target_level": 3},
        {"skill_id": "3", "skill_name": "Data Analysis", "current_level": 1, "target_level": 3},
        {"skill_id": "4", "skill_name": "Critical Thinking", "current_level": 1, "target_level": 3},
        {"skill_id": "5", "skill_name": "Economics", "current_level": 1, "target_level": 3}
    ]
    
    khan_academy_mcp = KhanAcademyMCP()
    courses = khan_academy_mcp.fetch_courses(user_skills, skill_gaps)
    
    print(f"Fetched {len(courses)} Courses:")
    for i, course in enumerate(courses, 1):
        print(f"\n{i}. {course['title']}")
        print(f"   URL: {course['url']}")
        if 'description' in course and course['description']:
            print(f"   Description: {course['description']}")
        if 'related_skill' in course:
            print(f"   Related Skill: {course['related_skill']}")
        if 'note' in course:
            print(f"   Note: {course['note']}")