import json
import re
from typing import List, Optional
import pandas as pd
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator,ValidationError


class SimpleOnlineCourseRecommendation(BaseModel):
    platform: str = Field(default="")
    course_name: str
    reason_to_recommend: str
    relevance_score: int  # Should be 0 to 100
    link: str = Field(default="")
from typing import List
class SimpleOfflineCourseRecommendation(BaseModel):
    
    course_name: str
    reason_to_recommend: str
    relevance_score: int  

class CourseRecommendationsOutput_Offline(BaseModel):
    courses: List[SimpleOfflineCourseRecommendation]

class CourseRecommendationOutput(BaseModel):
    recommendations: List[SimpleOnlineCourseRecommendation]

def parse_simple_llm_response(llm_response: str) -> CourseRecommendationOutput:
    try:
        parsed_json = json.loads(llm_response)
        validated_data = CourseRecommendationOutput(**parsed_json)
        return validated_data
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON format: {e}")
    except ValidationError as ve:
        raise ValueError(f"Pydantic validation error:\n{ve}")

def excel_to_json(skills_file, skills_column, courses_file, name_column, description_column):
    """
    Reads skills from one Excel file and courses (with name and description) from another, 
    then saves them as a structured JSON file.

    Args:
        skills_file (str): Path to the Excel file containing skills.
        skills_column (str): The column name that contains the skills.
        courses_file (str): Path to the Excel file containing courses.
        name_column (str): The column name for course names.
        description_column (str): The column name for course descriptions.

    Returns:
        str: JSON string with the extracted skills and courses.
    """

    # Read the skills file
    df_skills = pd.read_excel(skills_file)
    df_skills.columns = df_skills.columns.str.lower().str.strip()
    skills_column = skills_column.lower().strip()

    if skills_column not in df_skills.columns:
        raise ValueError(f"Column '{skills_column}' not found in skills file. Available columns: {list(df_skills.columns)}")

    skills = df_skills[skills_column].dropna().unique().tolist()

    # Read the courses file
    df_courses = pd.read_excel(courses_file)
    df_courses.columns = df_courses.columns.str.lower().str.strip()
    name_column = name_column.lower().strip()
    description_column = description_column.lower().strip()

    if name_column not in df_courses.columns or description_column not in df_courses.columns:
        raise ValueError(f"Columns '{name_column}' or '{description_column}' not found in courses file. Available columns: {list(df_courses.columns)}")

    courses = df_courses.dropna(subset=[name_column, description_column])  # Remove rows with missing values
    courses_list = [{"name": row[name_column], "description": row[description_column]} for _, row in courses.iterrows()]

    # Create final JSON structure
    data = {
        "organization_skills": skills,
        "courses": courses_list
    }

    # Save to JSON file
    with open("DataIb1.json", "w", encoding="utf-8") as json_file:
        json.dump(data, json_file, indent=4)

    return json.dumps(data, indent=4)  # Return JSON string for reference


def TNA(groq_api_key, goals, problems):
    """
    Analyzes multiple business goals and problems using an LLM to extract required skills.

    Args:
        groq_api_key (str): The API key for the Groq service.
        goals (list): A list of business goals (strings).
        problems (list): A list of business problems (strings).

    Returns:
        dict: A dictionary where keys are "Goal: [goal]" or "Problem: [problem]"
              and values are lists of extracted skills.
    """
    # llm = ChatGroq(
    #     model="llama-3.1-8b-instant",
    #     temperature=0,
    #     max_tokens=None,
    #     timeout=None,
    #     max_retries=2,
    #     groq_api_key=groq_api_key,
    #     # other params...
    # )

    llm = ChatOllama(
    model="gemma3:12b",
    temperature=0,
    # other params...
)

    skill_extraction_prompt = PromptTemplate(
        input_variables=["goals_str", "problems_str"],
        template = """
            You are an industry expert with over 10 years of experience in analyzing business goals and identifying the most critical skills required to achieve them.

            Given the following business goals:
            {goals_str}

            And the following business problems:
            {problems_str}

            Analyze the requirements for each goal and problem and provide a structured list of essential skills needed to address them effectively.

            For each goal and problem, clearly indicate which one it pertains to.

            - Ensure the list of skills for each goal/problem is precise, diverse, and highly relevant.
            - Provide the skills in a comma-separated format on a line starting with "Skills:" immediately following the Goal or Problem statement.
            - Do not include explanations, extra text, or other formatting besides the Goal/Problem statement and the "Skills:" line.

            **Output Format Example:**
            Goal: Increase customer retention
            Skills: Customer Service, Communication, Problem-Solving

            Problem: High employee turnover
            Skills: Employee Engagement, Leadership, Compensation Strategy
            """
    )

    goals_str = "\n".join([f"- {g}" for g in goals])
    problems_str = "\n".join([f"- {p}" for p in problems])

    skill_extraction_chain = skill_extraction_prompt | llm 
    input = {
    "goals_str": goals_str,
    "problems_str": problems_str,
    }
    response = skill_extraction_chain.invoke(input)
    print(response)
    extracted_skills = {}
    current_item = None
    for line in response.content.strip().split('\n'):
        line = line.strip()
        if line.startswith("Goal:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Problem:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Skills:") and current_item:
            skills_list = [skill.strip() for skill in line[len("Skills:"):].split(',')]
            extracted_skills[current_item] = skills_list
            current_item = None  # Reset for the next goal/problem

    return extracted_skills

def find_skill_gaps(required_skills_dict, organization_skills):
    """
    Analyzes skill gaps for multiple goals/problems.

    Args:
        required_skills_dict (dict): A dictionary where keys are goals/problems
                                      and values are lists of required skills.
        organization_skills (set): A set of the organization's existing skills.

    Returns:
        dict: A dictionary containing skill gap analysis for each goal/problem.
    """
    analysis_results = {}
    for item, required_skills in required_skills_dict.items():
        required_skills_set = set(required_skills)
        missing_skills = required_skills_set - organization_skills
        existing_skills = required_skills_set & organization_skills
        analysis_results[item] = {
            "existing_skills": list(existing_skills),
            "missing_skills": list(missing_skills),
        }
    return analysis_results



def recommend_online_courses(groq_api_key,goal_or_problem, missing_skills):
    # Validate inputs
    if not goal_or_problem or not missing_skills:
        raise ValueError("Goal/problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    course_recommendation_prompt = PromptTemplate(
        input_variables=["goal_or_problem", "missing_skills_str"],
        template="""
        As an expert learning and development specialist, your task is to recommend the most suitable online courses 
        that directly address the following:

        Business Goal/Problem: {{goal_or_problem}}

        Specific Skills Gaps Identified:
        {{missing_skills_str}}

        Recommendation Guidelines:
        1. Identify the most critical online courses that precisely match the skill gaps
        2. Only recommend courses with at least 95% relevance to the goal and missing skills
        3. Provide a comprehensive rationale for each course recommendation
        4. Consider courses from major online learning platforms (Coursera, Udemy, edX, etc.)

        Evaluation Criteria:
        - Direct alignment with the specific skills needed
        - Depth of content coverage
        - Practical applicability to the business goal
        - Potential to close the identified skill gaps

        **Recommended Output Format:**
        [Platform] Course Name - Relevance % - Detailed Relevance Explanation
        Example: 
        [Coursera] Advanced Data Analysis for Marketing Professionals - 98% - Comprehensive course covering advanced 
        statistical analysis, data visualization, and marketing-specific data interpretation techniques directly addressing 
        the identified skill gaps in data analysis capabilities
        """
    )

    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["goal_or_problem", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business goal or problem, and their identified missing skills, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:
{{
  "goal_or_problem": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].

---

### Inputs:

Goal / Problem:
{goal_or_problem}

Missing Skills:
{missing_skills_str}
"""
)

    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    goal_or_problem_str = goal_or_problem[3]
    missing_skills_str = missing_skills[3]
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data = {
        "goal_or_problem": goal_or_problem_str,
        "missing_skills_str": missing_skills_str,
    }

    # 5. Get recommendations
    recommendations = online_course_extraction_chain.invoke(input_data)
    return recommendations

def recommend_courses(groq_api_key,goal_or_problem, missing_skills,courses_available):
    """
    Generate targeted online course recommendations based on business goal and missing skills.
    
    Parameters:
    -----------
    goal_or_problem : str
        The specific business goal or problem that requires skill development
    
    missing_skills : List[str]
        A list of skills that need to be addressed
    
    llm : Optional[object]
        Language model to generate recommendations (defaults to OpenAI)
    
    temperature : float, optional
        Controls randomness of the output (default 0.3)
    
    max_tokens : int, optional
        Maximum length of the generated recommendations (default 500)
    
    Returns:
    --------
    str
        Detailed online course recommendations
    
    Example:
    --------
    >>> goal = "Improve data analysis capabilities in marketing team"
    >>> skills = [
    ...     "Advanced Excel data manipulation", 
    ...     "Marketing data visualization", 
    ...     "Statistical analysis for marketing insights"
    ... ]
    >>> recommendations = recommend_online_courses(goal, skills)
    >>> print(recommendations)
    """
    # Validate inputs
    if not goal_or_problem or not missing_skills:
        raise ValueError("Goal/problem and missing skills must be provided")
    
    # Create course recommendation prompt template

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["goal_or_problem", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business goal
- A list of missing skills
- A database of available courses (with name and description)

### Business Goal:
{goal_or_problem}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the goal and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the pydantic class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    course_name: str
    reason_to_recommend: str
    relevance_score: int


"""

        )



    llm = ChatOllama(
    model="gemma3:12b",
    temperature=0,
    )
    goal_or_problem_str = goal_or_problem[3]
    missing_skills_str = missing_skills[3]
    print("goal and missing skill set")
    print(goal_or_problem_str)
    print(missing_skills_str)
    #parserPy = PydanticOutputParser(pydantic_object=SimpleOfflineCourseRecommendation)


    online_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data = {
        "goal_or_problem": goal_or_problem_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations = online_course_extraction_chain.invoke(input_data)
    
    return recommendations

# Example Usage
if __name__ == "__main__":
    groq_api_key = "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"  # Replace with your actual Groq API key
    business_goals = [
        "Increase sales in the new Southeast Asia market by 20% in the next fiscal year.",
        "Improve the efficiency of our marketing campaigns.",
    ]
    business_problems = [
        "High customer churn rate.",
        "Lack of effective internal communication.",
    ]
    list_goal_prob = []
    for i,j in business_goals,business_problems:
        list_goal_prob.append(i)
        list_goal_prob.append(j)
        #external_course = recommend_online_courses(groq_api_key,list_goal_prob, skill_gap_analysis)
    #print(list_goal_prob[1])

    skills_file = "UserSkillDataIB.xlsx"  # Change this to your actual file path
    skills_column = "competency_name"  # Change this to the actual column name in your Excel file
    #json_output = excel_to_json(skills_file, skills_column)
    courses_file = "CourseDataIB.xlsx"
    courses_column = "name"
    description_column = "short_description"
    json_output = excel_to_json(skills_file, skills_column, courses_file, courses_column, description_column)
    # print("json_output")
    # print(json_output)
    # If json_output is a string, parse it first
    if isinstance(json_output, str):
        json_output = json.loads(json_output)

    # Now extract the courses
    course_data = json_output["courses"]

    # Convert to JSON string for prompt
    course_data_json = json.dumps(course_data, ensure_ascii=False, indent=2)
    #Load your data
    with open("DataIb1.json", "r") as f:
        data = json.load(f)
        organization_skills = set(data["organization_skills"])
        courses_data = data["courses"]

    # 1. Extract Required Skills using LLM
    required_skills_for_all = TNA(groq_api_key, business_goals, business_problems)
    print("Required Skills for Goals and Problems:")
    for item, skills in required_skills_for_all.items():
        print(f"{item}: {skills}")

    # 2. Identify Skill Gaps
    skill_gap_analysis = find_skill_gaps(required_skills_for_all, organization_skills)
    print("\nSkill Gap Analysis:")
    for item, analysis in skill_gap_analysis.items():
        print(f"\n--- {item} ---")
        print("Yess")
        print("Existing Skills:", analysis["existing_skills"])
        print("Missing Skills:", analysis["missing_skills"])
    all_missing_skills = [analysis["missing_skills"] for analysis in skill_gap_analysis.values()]
    print("all_missing_skills")
    print(all_missing_skills)
    print("===============================================================================")
    new_goal_prob = str(list_goal_prob)
    print(type(new_goal_prob))
    all_missing_skills_str = str(all_missing_skills)
    # print(type(all_missing_skills))
    # print(list_goal_prob)
    online_course = recommend_online_courses(groq_api_key,list_goal_prob, all_missing_skills)
    print(online_course.content)
    print("//////////////////////////////////////////////////////////////////////////////////////////////")
    print(online_course)
    print(type(online_course.content))
    of = online_course.content
    
    raw_response = of.strip("```json").strip("```").strip()
    #cleaned_response = json.loads(of)
    print(raw_response)
    # offline_course = recommend_courses(groq_api_key,list_goal_prob, all_missing_skills,course_data_json)
    
    # print("offline_course[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]")
    # print(offline_course)
    # print("//////////////////////////")
    # print(offline_course.content)
    # of = offline_course.content
    # raw_response = of.strip("```json").strip("```").strip()
    # #cleaned_response = json.loads(of)
    # print(raw_response)

    # online_course = recommend_online_courses(groq_api_key,list_goal_prob, all_missing_skills)
    # print("online_course[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]")
    # print(online_course.content)
    
#print(list_goal_prob)