
import requests
import json
import sys
import os
import re
from collections import defaultdict
import concurrent.futures
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin

# ========== Configuration ==========
OLLAMA_API_URL = "http://localhost:11434/api/generate"  # Default Ollama API URL
MODEL_NAME = "gemma3:12b"  # Specify Gemma 12B model

# Common MOOC platforms domains for validation
VALID_PLATFORMS = {
    "coursera": "coursera.org",
    "udemy": "udemy.com",
    "edx": "edx.org",
    "linkedin learning": "linkedin.com/learning",
    "pluralsight": "pluralsight.com",
    "udacity": "udacity.com",
    "skillshare": "skillshare.com",
    "Khan Academy": "khanacademy.org",
    "codecademy": "codecademy.com",
    "futurelearn": "futurelearn.com",
    "datacamp": "datacamp.com",
    "masterclass": "masterclass.com",
    "educative": "educative.io",
    "brilliant": "brilliant.org",
    "google": "google.com",
    "aws": "aws.amazon.com",
    "microsoft": "microsoft.com",
    "ibm": "ibm.com",
    "oracle": "oracle.com",
    "youtube": "youtube.com",
}

# List of common course durations for validation
COMMON_DURATIONS = [
    "self-paced", "1 hour", "2 hours", "3 hours", "4 hours", "5 hours",
    "1-2 hours", "2-3 hours", "3-4 hours", "4-5 hours", "5-10 hours",
    "1 day", "2 days", "3 days", "4 days", "5 days", "1 week", "2 weeks",
    "3 weeks", "4 weeks", "1 month", "2 months", "3 months", "6 months",
    "1-2 weeks", "2-3 weeks", "3-4 weeks", "1-2 months", "2-3 months", "3-6 months"
]

# User agent for requests to avoid being blocked
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# ========== Helper Functions ==========
def match_skills(user_skills, course_skills):
    """Calculate skill match score between user skills and course skills"""
    if not course_skills:
        return 0

    # Extract skill IDs and levels
    user_skill_ids = {skill.get('skill_id'): int(skill.get('skill_type', 1))
                      for skill in user_skills if 'skill_id' in skill}

    course_skill_ids = {skill.get('skill_id'): int(skill.get('skill_type', 1))
                        for skill in course_skills if 'skill_id' in skill}

    if not user_skill_ids or not course_skill_ids:
        return 0

    # Calculate match score
    match_score = 0
    for skill_id, course_level in course_skill_ids.items():
        if skill_id in user_skill_ids:
            user_level = user_skill_ids[skill_id]
            # Higher score if course level matches or is one level above user's level
            if course_level == user_level:
                match_score += 2
            elif course_level == user_level + 1:
                match_score += 1

    # Normalize score (0-100%)
    max_possible_score = 2 * len(course_skill_ids)
    if max_possible_score > 0:
        return (match_score / max_possible_score) * 100
    return 0

def extract_course_recommendations(answer, course_mapping, user_skills):
    """Extract course recommendations from LLM answer using multiple methods"""
    recommended_courses = []
    courses_found = set()  # To avoid duplicates

    # Method 1: Direct name matching (case-insensitive)
    for course_id, course_details in course_mapping.items():
        course_name = course_details['name']
        if course_name.lower() in answer.lower() and course_id not in courses_found:
            match_score = match_skills(user_skills, course_details.get('skills', []))
            recommended_courses.append({
                "courseId": course_id,
                "courseName": course_name,
                "matchScore": f"{match_score:.1f}%"
            })
            courses_found.add(course_id)

    # Method 2: Pattern matching for course recommendations
    recommendation_patterns = [
        r"recommend[s]?.*?[\"']?(.*?)[\"']?(?:\s*course|\s*class|\s*program|\s*training|\s*to learn|\s*for learning|\s*to master|\s*to improve|\s*to enhance|\s*to develop|\s*to advance|\s*to gain|\s*to acquire|\s*to build|\s*to strengthen|\s*to boost|\s*to increase|\s*to grow|\s*to expand|\s*to deepen|\s*to broaden|\s*to refine|\s*to polish|\s*to sharpen|\s*to hone|\s*to perfect|\s*to excel|\s*to succeed|\s*to progress|\s*to advance|\s*to move forward|\s*to get ahead|\s*to get better|\s*to get stronger|\s*to get more proficient|\s*to get more skilled|\s*to get more experienced|\s*to get more knowledgeable|\s*to get more competent|\s*to get more capable|\s*to get more adept|\s*to get more expert|\s*to get more masterful|\s*to get more accomplished|\s*to get more advanced|\s*to get more sophisticated)",
        r"suggest[s]?.*?[\"']?(.*?)[\"']?(?:\s*course|\s*class|\s*program|\s*training)",
        r"take the [\"']?(.*?)[\"']?(?:\s*course|\s*class|\s*program|\s*training)",
        r"enroll in [\"']?(.*?)[\"']?(?:\s*course|\s*class|\s*program|\s*training)",
        r"learn from [\"']?(.*?)[\"']?(?:\s*course|\s*class|\s*program|\s*training)",
        r"course[s]?:.*?[\"']?(.*?)[\"']?",
        r"course[s]? titled [\"']?(.*?)[\"']?",
        r"course[s]? called [\"']?(.*?)[\"']?",
        r"course[s]? named [\"']?(.*?)[\"']?"
    ]

    for pattern in recommendation_patterns:
        matches = re.finditer(pattern, answer, re.IGNORECASE)
        for match in matches:
            potential_course = match.group(1).strip()
            # Find the closest matching course name
            for course_id, course_details in course_mapping.items():
                course_name = course_details['name']
                # Check if the potential course name is a substring of the actual course name
                # or if the actual course name is a substring of the potential course name
                if (potential_course.lower() in course_name.lower() or
                    course_name.lower() in potential_course.lower()) and course_id not in courses_found:
                    match_score = match_skills(user_skills, course_details.get('skills', []))
                    recommended_courses.append({
                        "courseId": course_id,
                        "courseName": course_name,
                        "matchScore": f"{match_score:.1f}%"
                    })
                    courses_found.add(course_id)

    return recommended_courses

# Function to remove course IDs from a response
def remove_course_ids_from_text(text, course_mapping):
    """Remove course IDs from the text response"""
    # Replace any mentions of "Course ID: X" or similar patterns
    text = re.sub(r'Course ID:?\s*[\w\d_\-\.]+', '', text)
    text = re.sub(r'\(Course ID:?\s*[\w\d_\-\.]+\)', '', text)
    
    # Replace any JWT tokens or course IDs in the response
    text = re.sub(r'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9\.[^\.]+\.[^\.]+', '', text)
    
    # Replace any parenthetical mentions of course IDs
    text = re.sub(r'\(ID:?\s*[\w\d_\-\.]+\)', '', text)
    
    # For each course in the mapping, explicitly replace any mentions of its ID
    for course_id in course_mapping:
        text = text.replace(f"(Course ID: {course_id})", "")
        text = text.replace(f"Course ID: {course_id}", "")
        text = text.replace(f"(ID: {course_id})", "")
        text = text.replace(f"ID: {course_id}", "")
        text = text.replace(course_id, "")
    
    # Clean up any double spaces or empty parentheses created by the replacements
    text = re.sub(r'\(\s*\)', '', text)
    text = re.sub(r'\s{2,}', ' ', text)
    text = re.sub(r':\s*-', ':', text)
    
    return text

# ========== Query Ollama API ==========
def query_ollama(prompt):
    """Query Ollama API with Gemma 12B model"""
    try:
        headers = {
            'Content-Type': 'application/json'
        }

        payload = {
            "model": MODEL_NAME,
            "prompt": f"<system>You are a career advisor assistant specialized in recommending specific courses from a client's course catalog based on user skills and career goals.</system>\n\n<user>{prompt}</user>\n\n<assistant>",
            "stream": False,
            "options": {
                "temperature": 0.7,
                "top_p": 0.95,
                "top_k": 40,
                "max_tokens": 1024
            }
        }

        response = requests.post(OLLAMA_API_URL, headers=headers, json=payload)

        if response.status_code == 200:
            response_json = response.json()
            return response_json.get('response', '')
        else:
            return f"Error: API returned status code {response.status_code}, message: {response.text}"
    except Exception as e:
        return f"Error querying Ollama API: {str(e)}"

# ========== URL Validation Functions ==========
def is_valid_url(url):
    """Check if a URL is valid and has an acceptable domain"""
    try:
        # Basic URL validation
        if not url or url == "#" or len(url) < 10:
            return False
            
        # Parse the URL
        parsed_url = urlparse(url)
        
        # Check for scheme and netloc
        if not parsed_url.scheme or not parsed_url.netloc:
            return False
            
        # Check if the domain is from a known educational platform
        domain = parsed_url.netloc.lower()
        for platform, platform_domain in VALID_PLATFORMS.items():
            if platform_domain in domain:
                return True
                
        # Additional validation for unknown domains
        # If not a known platform, it should still have a valid structure
        return bool(parsed_url.scheme in ['http', 'https'] and parsed_url.netloc)
    except:
        return False

def verify_course_url(url, timeout=5):
    """Verify if a course URL is valid and accessible"""
    if not is_valid_url(url):
        return False, "Invalid URL format"
        
    try:
        response = requests.head(url, timeout=timeout, headers=HEADERS, allow_redirects=True)
        
        # Check if the URL is accessible
        if response.status_code < 400:
            return True, "URL is accessible"
        else:
            return False, f"URL returned status code {response.status_code}"
    except requests.RequestException:
        try:
            # Try a GET request as a fallback (some sites block HEAD requests)
            response = requests.get(url, timeout=timeout, headers=HEADERS, stream=True)
            response.close()  # Close the connection immediately after verifying
            
            if response.status_code < 400:
                return True, "URL is accessible"
            else:
                return False, f"URL returned status code {response.status_code}"
        except requests.RequestException as e:
            return False, f"Error accessing URL: {str(e)}"

def extract_course_info_from_url(url):
    """Extract basic course information from the URL if possible"""
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        if response.status_code != 200:
            return {}
            
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract title (commonly used for course names)
        title = soup.title.string if soup.title else ""
        title = title.strip() if title else ""
        
        # Try to find course name in common course page elements
        course_name_candidates = []
        for heading in soup.find_all(['h1', 'h2', 'h3']):
            if heading.text.strip():
                course_name_candidates.append(heading.text.strip())
        
        # Try to find instructor info
        instructor = ""
        instructor_elements = soup.find_all(string=re.compile(r'instructor|teacher|professor|author|taught by', re.I))
        if instructor_elements:
            for element in instructor_elements:
                parent = element.parent
                if parent and parent.text:
                    instructor_text = parent.text.strip()
                    instructor_match = re.search(r'(?:instructor|teacher|professor|author|taught by)[:\s]+([\w\s]+)', instructor_text, re.I)
                    if instructor_match:
                        instructor = instructor_match.group(1).strip()
                        break
        
        return {
            "title": title[:100] if title else "",  # Limit title length
            "name": course_name_candidates[0][:100] if course_name_candidates else title[:100],
            "instructor": instructor[:100] if instructor else ""
        }
    except Exception as e:
        return {}

def fix_url(url, platform):
    """Attempt to fix or standardize URLs based on platform"""
    if not url or url == "#":
        # Generate a search URL based on platform and course name
        platform_lower = platform.lower()
        for platform_name, domain in VALID_PLATFORMS.items():
            if platform_name.lower() in platform_lower:
                if "coursera" in platform_lower:
                    return f"https://www.coursera.org/search?query={platform_lower}"
                elif "udemy" in platform_lower:
                    return f"https://www.udemy.com/courses/search/?q={platform_lower}"
                elif "edx" in platform_lower:
                    return f"https://www.edx.org/search?q={platform_lower}"
                elif "linkedin" in platform_lower:
                    return f"https://www.linkedin.com/learning/search?keywords={platform_lower}"
                elif "pluralsight" in platform_lower:
                    return f"https://www.pluralsight.com/search?q={platform_lower}"
                # Add more platforms as needed
                break
                
        # Generic search if platform doesn't match known ones
        return f"https://www.google.com/search?q={platform}+online+course"
    
    # Check if the URL has a proper scheme, if not add https://
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
        
    return url

def is_valid_duration(duration_str):
    """Check if a duration string is in a valid format"""
    if not duration_str:
        return False
        
    duration_str = duration_str.lower()
    
    # Check against common duration formats
    for common_duration in COMMON_DURATIONS:
        if common_duration.lower() in duration_str:
            return True
    
    # Regular expression patterns for durations
    duration_patterns = [
        r'\d+\s*(?:hour|hr|h)s?',
        r'\d+\s*(?:minute|min|m)s?',
        r'\d+\s*(?:day|d)s?',
        r'\d+\s*(?:week|wk|w)s?',
        r'\d+\s*(?:month|mo)s?',
        r'\d+-\d+\s*(?:hour|hr|h|minute|min|m|day|d|week|wk|w|month|mo)s?',
        r'self[- ]paced',
        r'on[- ]demand',
        r'flexible',
        r'at your own pace'
    ]
    
    for pattern in duration_patterns:
        if re.search(pattern, duration_str, re.IGNORECASE):
            return True
            
    return False

# ========== Online Course Recommendations ==========
def get_online_course_recommendations(user_skills, user_query):
    """Generate online course recommendations based on user skills and query with improved validation"""
    try:
        # Extract skill names from user skills
        skill_names = [skill.get('skill_name', '') for skill in user_skills if 'skill_name' in skill]
        skill_levels = [f"{skill.get('skill_name', '')}: {skill.get('skill_type', '1')}" for skill in user_skills if 'skill_name' in skill]

        # Define top platforms to focus on
        top_platforms = ["Coursera", "Udemy", "edX", "LinkedIn Learning", "Pluralsight"]
        platform_str = ", ".join(top_platforms)

        # Construct an improved prompt for online course recommendations
        prompt = f"""
You are an expert career advisor specializing in online course recommendations. Your task is to recommend real, currently available online courses that match the user's needs.

User query: "{user_query}"

User skills and levels (1=Beginner, 2=Intermediate, 3=Advanced):
{', '.join(skill_levels)}

Please recommend 5 specific, real online courses from well-known platforms ({platform_str}) that would help this person advance their career based on their skills and query.

IMPORTANT REQUIREMENTS:
1. ONLY recommend courses that CURRENTLY EXIST and are ACTIVE (not archived or retired)
2. Include EXACT course names as they appear on the platform
3. Include ACCURATE and SPECIFIC URLs that lead directly to the course page
4. For each course, you MUST provide:
   - The exact course name
   - The platform it's available on (Coursera, Udemy, edX, LinkedIn Learning, etc.)
   - The instructor or organization name
   - The approximate course duration
   - The COMPLETE, DIRECT URL to the course (must start with http:// or https://)
   - A brief explanation of why this course is relevant (2-3 sentences)

Format your response using this exact structure:
```
COURSE: [Full Exact Course Name]
PLATFORM: [Platform Name]
INSTRUCTOR: [Instructor/Organization Name]
DURATION: [Course Duration]
URL: [Complete Direct URL]
REASON: [Explanation of relevance]
```

Repeat this format for each recommendation. Focus on quality over quantity - it's better to provide 3 accurate recommendations than 5 with errors.
"""

        # Query the Ollama API for online course recommendations
        response = query_ollama(prompt)

        # Parse the response to extract course recommendations with validation
        online_courses = parse_online_course_recommendations_improved(response)

        return online_courses
    except Exception as e:
        print(f"Error getting online course recommendations: {str(e)}")
        return []

def parse_online_course_recommendations_improved(response):
    """Parse and validate online course recommendations with improved reliability"""
    online_courses = []
    
    # Pattern to match the structured output format we requested
    pattern = r'COURSE:\s*(.*?)\s*(?:PLATFORM|$).*?PLATFORM:\s*(.*?)\s*(?:INSTRUCTOR|$).*?INSTRUCTOR:\s*(.*?)\s*(?:DURATION|$).*?DURATION:\s*(.*?)\s*(?:URL|$).*?URL:\s*(.*?)\s*(?:REASON|$).*?REASON:\s*(.*?)(?=COURSE:|$)'
    
    # Use re.DOTALL to match across multiple lines
    matches = re.finditer(pattern, response, re.DOTALL | re.IGNORECASE)
    
    # Process all matches with concurrent validation
    course_data = []
    for match in matches:
        groups = match.groups()
        if len(groups) >= 6:
            course_name = groups[0].strip()
            platform = groups[1].strip()
            instructor = groups[2].strip()
            duration = groups[3].strip()
            url = groups[4].strip()
            reason = groups[5].strip()
            
            # Only include courses with at least a name and platform
            if course_name and platform:
                course_data.append({
                    'name': course_name,
                    'platform': platform,
                    'instructor': instructor,
                    'duration': duration,
                    'url': url,
                    'reason': reason
                })
    
    # If the structured approach failed, try alternative parsing
    if not course_data:
        # Alternative pattern: Look for sections with course names and URLs
        alt_pattern = r'(?:Course|Title):\s*(.*?)(?:\n|$).*?(?:Platform|Provider):\s*(.*?)(?:\n|$).*?(?:Instructor|Author|By):\s*(.*?)(?:\n|$).*?(?:Duration|Length):\s*(.*?)(?:\n|$).*?(?:URL|Link):\s*(.*?)(?:\n|$).*?(?:Reason|Why):\s*(.*?)(?=(?:Course|Title):|$)'
        alt_matches = re.finditer(alt_pattern, response, re.DOTALL | re.IGNORECASE)
        
        for match in alt_matches:
            groups = match.groups()
            if len(groups) >= 6:
                course_name = groups[0].strip()
                platform = groups[1].strip()
                instructor = groups[2].strip()
                duration = groups[3].strip()
                url = groups[4].strip()
                reason = groups[5].strip()
                
                if course_name and platform:
                    course_data.append({
                        'name': course_name,
                        'platform': platform,
                        'instructor': instructor,
                        'duration': duration,
                        'url': url,
                        'reason': reason
                    })
    
    # If we still don't have structured data, extract as much as possible
    if not course_data:
        # Look for numbered lists with course mentions
        numbered_list_pattern = r'(?:\d+\.|-)?\s*([^:\n]+):\s*.*?(?:Platform|Available on):\s*([^,\n]+).*?(?:URL|Link):\s*(\S+)'
        numbered_matches = re.finditer(numbered_list_pattern, response, re.DOTALL)
        
        for match in numbered_matches:
            if len(match.groups()) >= 3:
                course_name = match.group(1).strip()
                platform = match.group(2).strip()
                url = match.group(3).strip()
                
                course_data.append({
                    'name': course_name,
                    'platform': platform,
                    'instructor': "Not specified",
                    'duration': "Varies",
                    'url': url,
                    'reason': "Relevant to your career goals and skills."
                })
    
    # Validate and fix the course data
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        # Start validation processes
        future_to_course = {
            executor.submit(validate_and_fix_course, course): course 
            for course in course_data
        }
        
        # Collect results as they complete
        for future in concurrent.futures.as_completed(future_to_course):
            course = future_to_course[future]
            try:
                valid_course = future.result()
                if valid_course:
                    online_courses.append(valid_course)
            except Exception as e:
                print(f"Error validating course {course.get('name')}: {str(e)}")
    
    # If after validation we still don't have enough courses, add some backup recommendations
    # based on the user query and skills
    if len(online_courses) < 3:
        backup_courses = get_backup_recommendations()
        for course in backup_courses:
            if len(online_courses) >= 5:
                break
            online_courses.append(course)
    
    # Return the top 5 validated courses
    return online_courses[:5]

def validate_and_fix_course(course):
    """Validate and fix a single course entry"""
    try:
        name = course.get('name', '').strip()
        platform = course.get('platform', '').strip()
        instructor = course.get('instructor', '').strip()
        duration = course.get('duration', '').strip()
        url = course.get('url', '').strip()
        reason = course.get('reason', '').strip()
        
        # Skip invalid courses
        if not name or not platform:
            return None
            
        # Fix URL if needed
        url = fix_url(url, platform)
        
        # Validate URL
        url_valid, url_message = verify_course_url(url)
        
        # If URL is valid, try to extract additional information
        additional_info = {}
        if url_valid:
            additional_info = extract_course_info_from_url(url)
            
            # Use extracted info if current fields are empty or generic
            if additional_info.get('name') and len(name) < 10:
                name = additional_info.get('name')
            
            if additional_info.get('instructor') and (not instructor or instructor == "Not specified"):
                instructor = additional_info.get('instructor')
        
        # Ensure duration is in a sensible format
        if not is_valid_duration(duration):
            duration = "Self-paced"
        
        # Ensure reason is meaningful
        if not reason or len(reason) < 20:
            reason = f"This course on {platform} covers topics relevant to your career goals and skill development needs."
        
        # Create the validated course object
        validated_course = {
            'name': name[:150],  # Limit length
            'platform': platform[:50],
            'instructor': instructor[:100] if instructor else "Not specified",
            'duration': duration[:50],
            'url': url,
            'reason': reason[:300],  # Limit reason length
            'valid_url': url_valid
        }
        
        return validated_course
    except Exception as e:
        print(f"Error in course validation: {str(e)}")
        return None

def get_backup_recommendations():
    """Return a list of reliable backup course recommendations that are very likely to exist"""
    return [
        {
            'name': "Google Data Analytics Professional Certificate",
            'platform': "Coursera",
            'instructor': "Google",
            'duration': "6 months",
            'url': "https://www.coursera.org/professional-certificates/google-data-analytics",
            'reason': "This comprehensive program covers data analysis fundamentals and provides a professional certificate recognized in the industry. It's an excellent choice for developing analytical skills.",
            'valid_url': True
        },
        {
            'name': "The Web Developer Bootcamp",
            'platform': "Udemy",
            'instructor': "Colt Steele",
            'duration': "63.5 hours",
            'url': "https://www.udemy.com/course/the-web-developer-bootcamp/",
            'reason': "This bestselling course covers front-end and back-end web development, teaching HTML, CSS, JavaScript, Node, and more. It's perfect for building a foundation in web development.",
            'valid_url': True
        },
        {
            'name': "Machine Learning",
            'platform': "Coursera",
            'instructor': "Stanford University & Andrew Ng",
            'duration': "11 weeks",
            'url': "https://www.coursera.org/learn/machine-learning",
            'reason': "This foundational course by Andrew Ng is perfect for building strong machine learning fundamentals. It covers supervised learning, unsupervised learning, and best practices.",
            'valid_url': True
        },
        {
            'name': "Excel Skills for Business Specialization",
            'platform': "Coursera",
            'instructor': "Macquarie University",
            'duration': "6 months",
            'url': "https://www.coursera.org/specializations/excel",
            'reason': "Excel is critical for many professional roles, and this specialization takes you from basics to advanced analytics features that can significantly improve productivity.",
            'valid_url': True
        },
        {
            'name': "Introduction to Python Programming",
            'platform': "edX",
            'instructor': "Georgia Tech",
            'duration': "5 weeks",
            'url': "https://www.edx.org/learn/python/georgia-institute-of-technology-introduction-to-python-programming",
            'reason': "Python is one of the most in-demand programming languages, and this course provides a comprehensive introduction applicable to many fields including data science and automation.",
            'valid_url': True
        }
    ]


# ========== Main Function ==========
def main(payload=None):
    try:
        # Check if payload is provided directly
        if payload is None:
            # Get file path from command line argument
            if len(sys.argv) <= 1:
                print(json.dumps({"error": "No file path provided"}))
                return

            file_path = sys.argv[1]

            if not os.path.exists(file_path):
                print(json.dumps({"error": f"File not found: {file_path}"}))
                return

            # Read the JSON payload from file
            try:
                with open(file_path, 'r') as f:
                    file_content = f.read()
                    payload = json.loads(file_content)
            except json.JSONDecodeError as e:
                print(json.dumps({"error": f"Invalid JSON in file: {str(e)}"}))
                return
            except Exception as e:
                print(json.dumps({"error": f"Error reading file: {str(e)}"}))
                return

        # Extract data from payload
        client_id = payload.get('client_id', 3)  # Default to 3 if not provided
        user_data = payload.get('user_data', {})
        user_query = payload.get('user_query', '')
        client_all_courses_data = payload.get('client_all_courses_data', {})

        # If user_query not found, check for userQuery (different casing)
        if not user_query and 'userQuery' in payload:
            user_query = payload.get('userQuery', '')

        # If user_data not found, check for userData (different casing)
        if not user_data and 'userData' in payload:
            user_data = payload.get('userData', {})

        # Extract user name
        user_name = ""
        if user_data and isinstance(user_data, dict):
            for user_info in user_data.values():
                user_name = user_info.get('user_name', 'User')
                break

        # Load course data from payload
        try:
            courses_data = client_all_courses_data.get(str(client_id), [])
            course_mapping = {course['courseId']: course for course in courses_data}
            competency_data = []  # Assuming competency_data is not used
        except Exception as e:
            print(json.dumps({"error": f"Error loading client data: {str(e)}"}))
            return

        # Extract user skills and find relevant courses based on skills
        user_skills = []
        completed_courses = []
        assigned_courses = []

        if user_data and isinstance(user_data, dict):
            for user_id, user_info in user_data.items():
                user_skills = user_info.get('skills', [])
                completed_courses = user_info.get('completedCourses', [])
                assigned_courses = user_info.get('assignedCourses', [])

        # Generate a detailed prompt for the LLM
        prompt = f"""
You are an expert career advisor. Your task is to provide personalized career guidance and recommend specific courses from the client's course catalog.

User query: "{user_query}"

User skills:
"""

        # Add skills from user data if available
        if user_skills:
            for skill in user_skills:
                skill_name = skill.get('skill_name', 'Unknown')
                skill_level = skill.get('skill_type', 'Unknown')
                skill_level_text = "Beginner" if skill_level == "1" else "Intermediate" if skill_level == "2" else "Advanced" if skill_level == "3" else skill_level
                prompt += f"- {skill_name} (Level: {skill_level_text})\n"

        if completed_courses:
            prompt += "\nCompleted Courses:\n"
            for course_id in completed_courses:
                course_name = course_mapping.get(course_id, {}).get('name', f"Unknown Course")
                prompt += f"- {course_name}\n"

        if assigned_courses:
            prompt += "\nAssigned Courses:\n"
            for course_id in assigned_courses:
                course_name = course_mapping.get(course_id, {}).get('name', f"Unknown Course")
                prompt += f"- {course_name}\n"

        # Add available courses to the prompt - MODIFIED TO EXCLUDE COURSE IDs
        prompt += "\nAvailable Courses (please recommend from this list):\n"
        for course in courses_data:
            course_name = course['name']
            course_skills = course.get('skills', [])

            skill_text = ""
            if course_skills:
                skill_names = [f"{skill.get('skill_name', '')} (Level: {skill.get('skill_type', '')})" for skill in course_skills]
                skill_text = f" - Skills: {', '.join(skill_names)}"

            prompt += f"- {course_name}{skill_text}\n"

        prompt += f"""
Based on the user's skills, completed courses, and assigned courses, provide career advice and recommend specific courses from the available list.

Please follow these guidelines:
1. Analyze the user's current skills and identify potential career paths.
2. Identify skill gaps for these career paths.
3. Recommend 3-5 specific courses from the available list that would help bridge these skill gaps.
4. For each recommended course, explain why it's relevant to the user's career goals.
5. Structure your recommendations in a clear format, mentioning the exact course names from the available list.
6. Provide a learning path that sequences these courses in a logical order.
7. DO NOT include or mention any course IDs in your response, ONLY use course names.

IMPORTANT: When recommending courses, use the EXACT course names from the available list provided above. Do not include course IDs or any identifiers other than the course name.

Format your response as follows:
1. Career Analysis: [Brief analysis of user's current skills and potential career paths]
2. Skill Gaps: [Identify skill gaps for these career paths]
3. Recommended Courses:
   - [Exact Course Name 1]: [Why this course is recommended]
   - [Exact Course Name 2]: [Why this course is recommended]
   - [Additional courses as needed]
4. Learning Path: [Sequence these courses in a logical order]
5. Additional Career Advice: [Any other relevant career guidance]

Keep your answer professional, specific, and motivating. Remember: NEVER include course IDs in your response.
"""

        # Get answer from Ollama (Gemma 12B)
        answer = query_ollama(prompt)
        
        # Additional post-processing to ensure no course IDs in the answer
        answer = remove_course_ids_from_text(answer, course_mapping)

        # Extract course recommendations from the answer
        recommended_courses = extract_course_recommendations(answer, course_mapping, user_skills)

        # Limit to top 5 recommendations
        recommended_courses = recommended_courses[:5]

        # Add reasoning and scoring to recommendations
        for course in recommended_courses:
            course_id = course["courseId"]
            course_name = course["courseName"]
            match_score = course.get("matchScore", "N/A")

            # Add reasoning based on the course details
            course_details = course_mapping.get(course_id, {})
            course_skills = course_details.get('skills', [])
            skill_names = [skill.get('skill_name', '') for skill in course_skills]

            reason = f"{course_name} is recommended because it covers essential skills such as {', '.join(skill_names)}, which are crucial for your career goals."

            course["reason"] = reason
            course["score"] = match_score

        # Get online course recommendations
        online_courses = get_online_course_recommendations(user_skills, user_query)
        
        # Format each online course to match the expected structure
        formatted_online_courses = []
        for i, course in enumerate(online_courses):
            formatted_online_courses.append({
                "courseId": f"online_{i+1}",  # Use a special ID format for online courses
                "courseName": course['name'],
                "platform": course['platform'],
                "instructor": course['instructor'],
                "duration": course['duration'],
                "matchScore": "N/A",  # Not applicable for online courses
                "reason": course['reason'],
                "score": "N/A",  # Not applicable for online courses
                "isOnlineCourse": True,  # Flag to identify online courses
                "url": course['url']  # Include the URL
            })

        # Format the final output
        final_output = {
            "answer": f"Hi {user_name},\n\n{answer}",
            "recommended_courses": recommended_courses,
            "online_course_recommendations": formatted_online_courses
        }

        # Print the JSON output
        print(json.dumps(final_output, indent=2))

    except Exception as e:
        import traceback
        print(json.dumps({"error": str(e)}, indent=2))
        traceback.print_exc()

if __name__ == "__main__":
    # Define the default payload for direct execution
    default_payload = {
        "client_id": 3,
        "user_query": "Suggest suitable career path for my skills and completed courses.",
        "user_data": {
            "4": {
                "user_name": "Kunal Jain",
                "managerId": "13962",
                "assignedCourses": ["6", "5", "62", "98", "82", "291", "455", "433", "353", "580"],
                "completedCourses": ["111", "114"],
                "skills": [
                    { "skill_id": "6", "skill_type": "3", "skill_name": "Leadership" },
                    { "skill_id": "7", "skill_type": "1", "skill_name": "Management" },
                    { "skill_id": "9", "skill_type": "2", "skill_name": "Communication" },
                    { "skill_id": "242", "skill_type": "3", "skill_name": "Project Management" },
                    { "skill_id": "26", "skill_type": "2", "skill_name": "Teamwork" },
                    { "skill_id": "8", "skill_type": "1", "skill_name": "Problem Solving" },
                    { "skill_id": "5", "skill_type": "2", "skill_name": "Time Management" },
                    { "skill_id": "190", "skill_type": "2", "skill_name": "Analytical Skills" },
                    { "skill_id": "70", "skill_type": "1", "skill_name": "Creativity" },
                    { "skill_id": "224", "skill_type": "1", "skill_name": "Adaptability" },
                    { "skill_id": "252", "skill_type": "1", "skill_name": "Innovation" },
                    { "skill_id": "234", "skill_type": "2", "skill_name": "Strategic Thinking" },
                    { "skill_id": "241", "skill_type": "1", "skill_name": "Decision Making" }
                ]
            }
        },
        "client_all_courses_data": {
            "3": [
                {
                    "courseId": "43",
                    "name": "Python Programming for Complete Beginners",
                    "short_description": "Python Programming for Complete Beginners",
                    "description": "",
                    "skills": []
                },
                {
                    "courseId": "58",
                    "name": "Laravel Beginner",
                    "short_description": "Laravel Beginner description",
                    "description": "",
                    "skills": []
                }
                # Add more courses as needed
            ]
        }
    }

    # Check if the script is run with a file path argument
    if len(sys.argv) > 1:
        main()  # Run with file path argument
    else:
        main(default_payload)  # Run with default payload