import json
import re
from typing import List, Optional
import pandas as pd
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator,ValidationError
from typing import List
import json

class SimpleOnlineCourseRecommendation(BaseModel):
    platform: str = Field(default="")
    course_name: str
    reason_to_recommend: str
    relevance_score: int  # Should be 0 to 100
    link: str = Field(default="")

class SimpleOfflineCourseRecommendation(BaseModel):
    
    course_name: str
    reason_to_recommend: str
    relevance_score: int  

class CourseRecommendationsOutput_Offline(BaseModel):
    courses: List[SimpleOfflineCourseRecommendation]

class CourseRecommendationOutput(BaseModel):
    recommendations: List[SimpleOnlineCourseRecommendation]

def parse_simple_llm_response(llm_response: str) -> CourseRecommendationOutput:
    try:
        parsed_json = json.loads(llm_response)
        validated_data = CourseRecommendationOutput(**parsed_json)
        return validated_data
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON format: {e}")
    except ValidationError as ve:
        raise ValueError(f"Pydantic validation error:\n{ve}")

def excel_to_json(skills_file, skills_column, courses_file, name_column, description_column, id_column):
    # Read and normalize skills file
    df_skills = pd.read_excel(skills_file)
    df_skills.columns = df_skills.columns.str.lower().str.strip()
    skills_column = skills_column.lower().strip()

    if skills_column not in df_skills.columns:
        raise ValueError(f"Column '{skills_column}' not found in skills file. Available columns: {list(df_skills.columns)}")

    skills = df_skills[skills_column].dropna().unique().tolist()

    # Read and normalize courses file
    df_courses = pd.read_excel(courses_file)
    df_courses.columns = df_courses.columns.str.lower().str.strip()
    name_column = name_column.lower().strip()
    description_column = description_column.lower().strip()
    id_column = id_column.lower().strip()

    required_columns = [name_column, description_column, id_column]
    for col in required_columns:
        if col not in df_courses.columns:
            raise ValueError(f"Column '{col}' not found in courses file. Available columns: {list(df_courses.columns)}")

    # Drop rows missing any required field
    courses = df_courses.dropna(subset=required_columns)

    # Build course list with ID
    courses_list = [
        {
            "name": row[name_column],
            "description": row[description_column],
            "courseId": row[id_column]
        }
        for _, row in courses.iterrows()
    ]

    # Create final JSON structure
    data = {
        "organization_skills": skills,
        "courses": courses_list
    }

    # Save to JSON file
    with open("DataIb1.json", "w", encoding="utf-8") as json_file:
        json.dump(data, json_file, indent=4)

    return json.dumps(data, indent=4)  # Return JSON string for reference

def string_data_to_json(skills, course_names, course_descriptions):
    # Convert comma-separated strings to lists if necessary
    if isinstance(skills, str):
        skills = [skill.strip() for skill in skills.split(",") if skill.strip()]

    if isinstance(course_names, str):
        course_names = [name.strip() for name in course_names.split(",") if name.strip()]

    if isinstance(course_descriptions, str):
        course_descriptions = [desc.strip() for desc in course_descriptions.split(",") if desc.strip()]

    # Validate matching lengths
    if len(course_names) != len(course_descriptions):
        raise ValueError("The number of course names and descriptions must be the same.")

    # Build course list
    courses_list = [{"name": name, "description": desc} for name, desc in zip(course_names, course_descriptions)]

    # Create final JSON structure
    data = {
        "organization_skills": skills,
        "courses": courses_list
    }

    # Save to JSON file
    with open("DataFromString.json", "w", encoding="utf-8") as json_file:
        json.dump(data, json_file, indent=4)

    return json.dumps(data, indent=4)  # Return JSON string for reference

def TNA_Goals(goals):

    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
        # other params...
    )

    skill_extraction_prompt = PromptTemplate(
        input_variables=["goals_str"],
        template = """
            You are an industry expert with over 10 years of experience in analyzing business goals and identifying the most critical skills required to achieve them.

            Given the following business goals:
            {goals_str}

            

            Analyze the requirements for each goal  and provide a structured list of essential skills needed to address them effectively.

            For each goal , clearly indicate which one it pertains to.

            - Ensure the list of skills for each goal/problem is precise, diverse, and highly relevant.
            - Provide the skills in a comma-separated format on a line starting with "Skills:" immediately following the Goal  statement.
            - Do not include explanations, extra text, or other formatting besides the Goal statement and the "Skills:" line.

            **Output Format Example:**
            Goal: Increase customer retention
            Skills: Customer Service, Communication, Problem-Solving

            Problem: High employee turnover
            Skills: Employee Engagement, Leadership, Compensation Strategy
            """
    )

    goals_str = goals
    

    skill_extraction_chain = skill_extraction_prompt | llm 
    input = {
    "goals_str": goals_str,
    }
    response = skill_extraction_chain.invoke(input)
    extracted_skills = {}
    current_item = None
    for line in response.content.strip().split('\n'):
        line = line.strip()
        if line.startswith("Goal:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Problem:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Skills:") and current_item:
            skills_list = [skill.strip() for skill in line[len("Skills:"):].split(',')]
            extracted_skills[current_item] = skills_list
            current_item = None  # Reset for the next goal/problem
    return extracted_skills

    # return response

def TNA_Problems(problems):

    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
        # other params...
    )

    skill_extraction_prompt = PromptTemplate(
        input_variables=["problems_str"],
        template = """
            You are an industry expert with over 10 years of experience in analyzing business problems and identifying the most critical skills required to achieve them.

         

            Given the following business problems:
            {problems_str}

            Analyze the requirements for each  problem and provide a structured list of essential skills needed to address them effectively.

            For each goal and problem, clearly indicate which one it pertains to.

            - Ensure the list of skills for each problem is precise, diverse, and highly relevant.
            - Provide the skills in a comma-separated format on a line starting with "Skills:" immediately following the Problem statement.
            - Do not include explanations, extra text, or other formatting besides the Problem statement and the "Skills:" line.

            **Output Format Example:**
            Goal: Increase customer retention
            Skills: Customer Service, Communication, Problem-Solving

            Problem: High employee turnover
            Skills: Employee Engagement, Leadership, Compensation Strategy
            """
    )


    problems_str =problems

    skill_extraction_chain = skill_extraction_prompt | llm 
    input = {
    "problems_str": problems_str
    }
    response = skill_extraction_chain.invoke(input)
    extracted_skills = {}
    current_item = None
    for line in response.content.strip().split('\n'):
        line = line.strip()
        if line.startswith("Goal:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Problem:"):
            current_item = line
            extracted_skills[current_item] = []
        elif line.startswith("Skills:") and current_item:
            skills_list = [skill.strip() for skill in line[len("Skills:"):].split(',')]
            extracted_skills[current_item] = skills_list
            current_item = None  # Reset for the next goal/problem
    return extracted_skills

    # return response
    
def find_skill_gaps_goals(required_skills_dict, organization_skills):
    # Convert organization_skills to a set to support set operations
    organization_skills_set = set(organization_skills)
    
    analysis_results = {}
    for item, required_skills in required_skills_dict.items():
        # Convert required_skills to a set
        required_skills_set = set(required_skills)
        
        # Calculate missing and existing skills
        missing_skills = required_skills_set - organization_skills_set
        existing_skills = required_skills_set & organization_skills_set
        
        # Store the results
        analysis_results[item] = {
            "existing_skills": list(existing_skills),
            "missing_skills": list(missing_skills),
        }
        
    return analysis_results

def find_skill_gaps_problems(required_skills_dict, organization_skills):
    # Convert organization_skills to a set to support set operations
    organization_skills_set = set(organization_skills)
    
    analysis_results = {}
    for item, required_skills in required_skills_dict.items():
        # Convert required_skills to a set
        required_skills_set = set(required_skills)
        
        # Calculate missing and existing skills
        missing_skills = required_skills_set - organization_skills_set
        existing_skills = required_skills_set & organization_skills_set
        
        # Store the results
        analysis_results[item] = {
            "existing_skills": list(existing_skills),
            "missing_skills": list(missing_skills),
        }
        
    return analysis_results

def recommend_online_courses_goal(goal, missing_skills):
    # Validate inputs
    if not goal or not missing_skills:
        raise ValueError("Goal/problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    course_recommendation_prompt = PromptTemplate(
        input_variables=["goal_or_problem", "missing_skills_str"],
        template="""
        As an expert learning and development specialist, your task is to recommend the most suitable online courses 
        that directly address the following:

        Business Goal/Problem: {{goal_or_problem}}

        Specific Skills Gaps Identified:
        {{missing_skills_str}}

        Recommendation Guidelines:
        1. Identify the most critical online courses that precisely match the skill gaps
        2. Only recommend courses with at least 95% relevance to the goal and missing skills
        3. Provide a comprehensive rationale for each course recommendation
        4. Consider courses from major online learning platforms (Coursera, Udemy, edX, etc.)

        Evaluation Criteria:
        - Direct alignment with the specific skills needed
        - Depth of content coverage
        - Practical applicability to the business goal
        - Potential to close the identified skill gaps

        **Recommended Output Format:**
        [Platform] Course Name - Relevance % - Detailed Relevance Explanation
        Example: 
        [Coursera] Advanced Data Analysis for Marketing Professionals - 98% - Comprehensive course covering advanced 
        statistical analysis, data visualization, and marketing-specific data interpretation techniques directly addressing 
        the identified skill gaps in data analysis capabilities
        """
    )

    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["goal", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business goal , and their identified missing skills, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:

{{
Goal:{{
  "goalTitle": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations_online": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ]
  
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75

---

### Inputs:

Goal / Problem:
{goal}

Missing Skills:
{missing_skills_str}
"""
)

    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    goal_str = goal
    missing_skills_str = missing_skills
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data = {
        "goal": goal_str,
        "missing_skills_str": missing_skills_str,
    }

    # 5. Get recommendations
    recommendations = online_course_extraction_chain.invoke(input_data)
    return recommendations

def recommend_online_courses_problems(problem, missing_skills):
    # Validate inputs
    if not problem or not missing_skills:
        raise ValueError("Problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    course_recommendation_prompt = PromptTemplate(
        input_variables=["goal_or_problem", "missing_skills_str"],
        template="""
        As an expert learning and development specialist, your task is to recommend the most suitable online courses 
        that directly address the following:

        Business Goal/Problem: {{goal_or_problem}}

        Specific Skills Gaps Identified:
        {{missing_skills_str}}

        Recommendation Guidelines:
        1. Identify the most critical online courses that precisely match the skill gaps
        2. Only recommend courses with at least 95% relevance to the goal and missing skills
        3. Provide a comprehensive rationale for each course recommendation
        4. Consider courses from major online learning platforms (Coursera, Udemy, edX, etc.)

        Evaluation Criteria:
        - Direct alignment with the specific skills needed
        - Depth of content coverage
        - Practical applicability to the business goal
        - Potential to close the identified skill gaps

        **Recommended Output Format:**
        [Platform] Course Name - Relevance % - Detailed Relevance Explanation
        Example: 
        [Coursera] Advanced Data Analysis for Marketing Professionals - 98% - Comprehensive course covering advanced 
        statistical analysis, data visualization, and marketing-specific data interpretation techniques directly addressing 
        the identified skill gaps in data analysis capabilities
        """
    )

    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["problem", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business problem , and their identified missing skills, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:
Goal:
{{
  "problemTtile": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations_online": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75

---

### Inputs:

Goal / Problem:
{problem}

Missing Skills:
{missing_skills_str}
"""
)

    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    problem_str = problem
    missing_skills_str = missing_skills
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
    }

    # 5. Get recommendations
    recommendations = online_course_extraction_chain.invoke(input_data)
    return recommendations

def recommend_offline_courses_goal(goal, missing_skills,courses_available):
    # Validate inputs
    if not goal or not missing_skills:
        raise ValueError("Goal and missing skills must be provided")
    
    # Create course recommendation prompt template

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["goal", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business goal
- A list of missing skills
- A database of available courses (with name and description)

### Business Goal:
{goal}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the goal and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the JSON class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    ### JSON Output Format:

{{
Goal:{{
  "goaltitle": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations_offline": [
    {{
      
      "course_name": "Course title",
      
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75


"""

        )



    llm = ChatOllama(
    model="gemma3:12b",
    temperature=0,
    )
    goal_str= goal
    missing_skills_str = missing_skills
    print("goal ")
    print(goal)
    print(missing_skills_str)
    #parserPy = PydanticOutputParser(pydantic_object=SimpleOfflineCourseRecommendation)


    online_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data = {
        "goal": goal_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations = online_course_extraction_chain.invoke(input_data)
    
    return recommendations

def recommend_offline_courses_problems(problem, missing_skills,courses_available):
    # Validate inputs
    if not problem or not missing_skills:
        raise ValueError("Problem and missing skills must be provided")
    
    # Create course recommendation prompt template

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["problem", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business problem
- A list of missing skills
- A database of available courses (with name and description)

### Business Goal:
{problem}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the goal and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the pydantic class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    class SimpleOfflineCourseRecommendation(BaseModel):
    
    ### JSON Output Format:
Goal:
{{
  "problemTtile": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations_offline": [
    {{
      
      "course_name": "Course title",
      
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75


"""

        )



    llm = ChatOllama(
    model="gemma3:12b",
    temperature=0,
    )
    problem_str = problem
    missing_skills_str = missing_skills
    print("goal and missing skill set")
    #parserPy = PydanticOutputParser(pydantic_object=SimpleOfflineCourseRecommendation)


    online_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations = online_course_extraction_chain.invoke(input_data)
    
    return recommendations

def recommend_online_offline_courses_problems(problem, missing_skills,courses_available):
    # Validate inputs
    if not problem or not missing_skills:
        raise ValueError("Problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["problem", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business problem , and their identified missing skills, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:
Problem:
{{
  "problemTtile": "string",
  "course_recommendations_online": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75

---

### Inputs:

Goal / Problem:
{problem}

Missing Skills:
{missing_skills_str}
"""
)

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["problem", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business problem
- A list of missing skills
- A database of available courses (with name and description)

### Business Goal:
{problem}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the goal and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "course_id" : string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the pydantic class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    class SimpleOfflineCourseRecommendation(BaseModel):
    
    ### JSON Output Format:
Problem:
{{
  "problemTtile": "string",
  "missing_skills": ["list", "of", "skills"],
  "course_recommendations_offline": [
    {{
      
      "course_name": "Course title",
      "course_id" : Course ID from course_data_json,
      "reason_to_recommend": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75


"""

        )
    
    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    problem_str = problem   
    missing_skills_str = missing_skills
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data_online = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
    }

    # 5. Get recommendations
    # recommendations = online_course_extraction_chain.invoke(input_data_online)

    offline_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data_offline = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations_offline = offline_course_extraction_chain.invoke(input_data_offline)
    # print("recommendations_offline========================================================================")
    # print(recommendations_offline.content)
    # 5. Get recommendations
    recommendations_online = online_course_extraction_chain.invoke(input_data_online)
    
    # print("recommendations_online++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    # print(recommendations_online.content)
    recommendations_offline_data1 = recommendations_offline.content
    recommendations_online_data1 = recommendations_online.content
    recommendations_offline_data2 = recommendations_offline_data1.strip("```json").strip("```").strip()
    recommendations_online_data2 = recommendations_online_data1.strip("```json").strip("```").strip()
    
    recommendations_offline_data = json.loads(recommendations_offline_data2)
    print("=============================================================================Offlieneeee Prob Start")
    print(recommendations_offline_data)
    print("=============================================================================Offlieneeee Prob End")
    recommendations_online_data = json.loads(recommendations_online_data2)
    print("--------------------------------------------------------------------------------")
    print(recommendations_online_data)
    # Extract relevant data
    course_recommendations_offline = recommendations_offline_data.get("course_recommendations_offline", [])
    problem_title_offline = recommendations_offline_data.get("problemtitle", "")

    #problem_data_online = recommendations_online_data.get("Problem", {})
    problem_title_online = recommendations_online_data.get("problemTitle", "")
    course_recommendations_online = recommendations_online_data.get("course_recommendations_online", [])

    # Choose goal title (online preferred)
    final_problem_title = problem_title_online or problem_title_offline

    # Convert missing_skills to a list if it's not already
    if isinstance(missing_skills, str):
        missing_skills_list = [skill.strip() for skill in missing_skills.split(',')]
    elif isinstance(missing_skills, list) and all(isinstance(item, list) for item in missing_skills):
        # Flatten the list of lists
        missing_skills_list = [skill for sublist in missing_skills for skill in sublist]
    else:
        missing_skills_list = missing_skills

    # Merge into final structure
    # final_output = {
    #     "Goal": {
    #         "goalTitle": final_goal_title,
    #         "missing_skills": missing_skills_list,  # Add missing skills to the output
    #         "course_recommendations_offline": course_recommendations_offline,
    #         "course_recommendations_online": course_recommendations_online
    #     }
    # }

    # Merge into final structure
    final_output = {
        "Problem": {
            "problemTitle": final_problem_title,
            "missing_skills": missing_skills_list,
            "course_recommendations_offline": course_recommendations_offline,
            "course_recommendations_online": course_recommendations_online
        }
    }

    # Print or return
    print("+=================================================================")
    print(json.dumps(final_output, indent=2))
    return final_output
    #return recommendations

def recommend_online_offline_courses_goal(goal, missing_skills,courses_available):
    # Validate inputs
    if not goal or not missing_skills:
        raise ValueError("Goal/problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    

    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["goal", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business goal , and their identified missing skills, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:

{{
Goal:{{
  "goalTitle": "string",
  "course_recommendations_online": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ]
  
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75

---

### Inputs:

Goal / Problem:
{goal}

Missing Skills:
{missing_skills_str}
"""
)

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["goal", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business goal
- A list of missing skills
- A database of available courses (with name and description)

### Business Goal:
{goal}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the goal and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "course_id" : string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the JSON class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    ### JSON Output Format:

{{
Goal:{{
  "goaltitle": "string",
  "course_recommendations_offline": [
    {{
      
      "course_name": "Course title",
      "course_id" : Course ID from course_data_json,
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75


"""

        )
    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    goal_str = goal
    missing_skills_str = missing_skills
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data_online = {
        "goal": goal_str,
        "missing_skills_str": missing_skills_str,
    }

    offline_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data_offline = {
        "goal": goal_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations_offline = offline_course_extraction_chain.invoke(input_data_offline)
    # print("recommendations_offline========================================================================")
    # print(recommendations_offline.content)
    # 5. Get recommendations
    recommendations_online = online_course_extraction_chain.invoke(input_data_online)
    # print("recommendations_online++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    # print(recommendations_online.content)
    recommendations_offline_data1 = recommendations_offline.content
    recommendations_online_data1 = recommendations_online.content
    recommendations_offline_data2 = recommendations_offline_data1.strip("```json").strip("```").strip()
    recommendations_online_data2 = recommendations_online_data1.strip("```json").strip("```").strip()
    recommendations_offline_data = json.loads(recommendations_offline_data2)
    print("=============================================================================Offlieneeee Goal Start ")
    print(recommendations_offline_data)
    print("=============================================================================Offlieneeee Goal End")
    recommendations_online_data = json.loads(recommendations_online_data2)

    # Extract relevant data
    course_recommendations_offline = recommendations_offline_data.get("course_recommendations_offline", [])
    goal_title_offline = recommendations_offline_data.get("goaltitle", "")

    goal_data_online = recommendations_online_data.get("Goal", {})
    goal_title_online = goal_data_online.get("goalTitle", "")
    course_recommendations_online = goal_data_online.get("course_recommendations_online", [])

    # Choose goal title (online preferred)
    final_goal_title = goal_title_online or goal_title_offline

    # Convert missing_skills to a list if it's not already
    if isinstance(missing_skills, str):
        missing_skills_list = [skill.strip() for skill in missing_skills.split(',')]
    elif isinstance(missing_skills, list) and all(isinstance(item, list) for item in missing_skills):
        # Flatten the list of lists
        missing_skills_list = [skill for sublist in missing_skills for skill in sublist]
    else:
        missing_skills_list = missing_skills

    # Merge into final structure
    final_output = {
        "Goal": {
            "goalTitle": final_goal_title,
            "missing_skills": missing_skills_list,  # Add missing skills to the output
            "course_recommendations_offline": course_recommendations_offline,
            "course_recommendations_online": course_recommendations_online
        }
    }

    # Print or return
    print(json.dumps(final_output, indent=2))
    return final_output
    #return recommendations_offline,recommendations_online

def recommend_online_offline_courses_problems1(problem, missing_skills,courses_available):
    # Validate inputs
    if not problem or not missing_skills:
        raise ValueError("Goal/problem and missing skills must be provided")
    
    # Create course recommendation prompt template
    

    course_recommendation_prompt2 = PromptTemplate(
    input_variables=["problem", "missing_skills_str"],
    template="""
You are a professional learning and development advisor.

Given the user's business goals , and their identified missing problems, output the recommendation report in **STRICT JSON FORMAT**.

### JSON Output Format:

{{
Problem:{{
  "problemTitle": "string",
  "course_recommendations_online": [
    {{
      "platform": "Platform name",
      "course_name": "Course title",
      "level": "Beginner / Intermediate / Advanced",
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ]
  
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75

---

### Inputs:

Problem:
{problem}

Missing Skills:
{missing_skills_str}
"""
)

    course_recommendation_prompt_simplified = PromptTemplate(
    input_variables=["problem", "missing_skills_str", "course_data_json"],
    template="""
You are a professional learning and development advisor.

You are given:
- A business problem
- A list of missing skills
- A database of available courses (with name and description)

### Business Problem:
{problem}

### Missing Skills:
{missing_skills_str}

### Available Courses:
{course_data_json}

Your task is to:
1. Evaluate each course for how well it helps achieve the problem and address the missing skills.
2. Select the most relevant courses (maximum 5).
3. For each course, return ONLY the following fields:
   - "course_name": string
   - "reason_to_recommend": string
   - "relevance_score": integer (0-100)

Don't give any extra text . Just return JSON Output.
Here is the JSON class to validate the output that you give. Make sure the check always passes.

class SimpleOfflineCourseRecommendation(BaseModel):
    
    ### JSON Output Format:

{{
problem:{{
  "problemtitle": "string",
  "course_recommendations_offline": [
    {{
      
      "course_name": "Course title",
      
      "why_recommended": "Reason why the course is useful",
      "relevance_score": integer (0-100),
      "link": "Course URL or 'Available on [Platform]'"
    }}
  ],
  "suggested_order": [
    "Course 1 name",
    "Course 2 name",
    "Course 3 name"
  ]
  }}
}}

**Rules:**
- Only return valid JSON, no markdown, no headings.
- Use double quotes `"` around all keys and string values.
- Ensure the JSON is syntactically correct (ready for `json.loads()`).
- If a field is missing, use an empty string "" or empty array [].
- Only give those courses which relevance_score more than 75


"""

        )
    llm = ChatOllama(
        model="gemma3:12b",
        temperature=0,
    )

    problem_str = problem
    missing_skills_str = missing_skills
    # 3. Pipe prompt to LLM
    online_course_extraction_chain = course_recommendation_prompt2 | llm

    # 4. Call the chain with correct variable names
    input_data_online = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
    }

    offline_course_extraction_chain = course_recommendation_prompt_simplified | llm 

    # 4. Call the chain with correct variable names
    input_data_offline = {
        "problem": problem_str,
        "missing_skills_str": missing_skills_str,
        "course_data_json": courses_available
    }


    recommendations_offline = offline_course_extraction_chain.invoke(input_data_offline)
    # print("recommendations_offline========================================================================")
    # print(recommendations_offline.content)
    # 5. Get recommendations
    recommendations_online = online_course_extraction_chain.invoke(input_data_online)
    # print("recommendations_online++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    # print(recommendations_online.content)
    recommendations_offline_data1 = recommendations_offline.content
    recommendations_online_data1 = recommendations_online.content
    print("recommendations_online_data1")
    print(recommendations_online_data1)
    recommendations_offline_data2 = recommendations_offline_data1.strip("```json").strip("```").strip()
    
    recommendations_online_data2 = recommendations_online_data1.strip("```json").strip("```").strip()
    print("recommendations_online_data2")
    print(recommendations_online_data2)
    recommendations_offline_data = json.loads(recommendations_offline_data2)
    recommendations_online_data = json.loads(recommendations_online_data2)

    # Extract relevant data
    course_recommendations_offline = recommendations_offline_data.get("course_recommendations_offline", [])
    goal_title_offline = recommendations_offline_data.get("problemtitle", "")

    goal_data_online = recommendations_online_data.get("problem", {})
    goal_title_online = goal_data_online.get("problemTitle", "")
    course_recommendations_online = goal_data_online.get("course_recommendations_online", [])

    # Choose goal title (online preferred)
    final_goal_title = goal_title_online or goal_title_offline

    # Convert missing_skills to a list if it's not already
    if isinstance(missing_skills, str):
        missing_skills_list = [skill.strip() for skill in missing_skills.split(',')]
    elif isinstance(missing_skills, list) and all(isinstance(item, list) for item in missing_skills):
        # Flatten the list of lists
        missing_skills_list = [skill for sublist in missing_skills for skill in sublist]
    else:
        missing_skills_list = missing_skills

    # Merge into final structure
    final_output = {
        "Problem": {
            "goalTitle": final_goal_title,
            "missing_skills": missing_skills_list,  # Add missing skills to the output
            "course_recommendations_offline": course_recommendations_offline,
            "course_recommendations_online": course_recommendations_online
        }
    }

    # Print or return
    print(json.dumps(final_output, indent=2))
    return final_output
    #return recommendations_offline,recommendations_online
# Example Usage
if __name__ == "__main__":
    groq_api_key = "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"  # Replace with your actual Groq API key
    # business_goals = [
    #     "Increase sales in the new Southeast Asia market by 20% in the next fiscal year.",
    #     "Improve the efficiency of our marketing campaigns.",
    # ]
    # business_problems = [
    #     "High customer churn rate.",
    #     "Lack of effective internal communication.",
    # ]
    business_goals = [
        "Increase sales in the new Southeast Asia market by 20% in the next fiscal year."
        
    ]
    business_problems = [
       
        "Lack of effective internal communication."
    ]
    # list_goal_prob = []
    # for i,j in business_goals,business_problems:
    #     list_goal_prob.append(i)
    #     list_goal_prob.append(j)
        #external_course = recommend_online_courses(groq_api_key,list_goal_prob, skill_gap_analysis)
    #print(list_goal_prob[1])

    skills_file = "UserSkillDataIB.xlsx"  # Change this to your actual file path
    skills_column = "competency_name"  # Change this to the actual column name in your Excel file
    #json_output = excel_to_json(skills_file, skills_column)
    courses_file = "CourseDataIB.xlsx"
    courses_column = "name"
    description_column = "short_description"
    id_column = 'lms_course_id'
    json_output = excel_to_json(skills_file, skills_column, courses_file, courses_column, description_column,id_column)
    print("json_output")
    print(json_output)
    # If json_output is a string, parse it first
    if isinstance(json_output, str):
        json_output = json.loads(json_output)

    # Now extract the courses
    course_data = json_output["courses"]

    # Convert to JSON string for prompt
    course_data_json = json.dumps(course_data, ensure_ascii=False, indent=2)
    #Load your data
    with open("DataIb1.json", "r") as f:
        data = json.load(f)
        organization_skills = set(data["organization_skills"])
        courses_data = data["courses"]

    # 1. Extract Required Skills using LLM
    ######################
    #required_skills_for_all = TNA(business_goals, business_problems)
    problems="Lack of effective internal communication."
    required_skills_problem = TNA_Problems(problems)
    print("SDASDASDASDAD")
    print(required_skills_problem)
    print("Required Skills for Goals and Problems:")
    
    # for item, skills in required_skills_for_all.items():
    #     print(f"{item}: {skills}")
    #print(required_skills_for_all)
    # req_skill_content = required_skills_for_all.content
    # print(type(req_skill_content))
    # # Extract skill lines
    # skill_lines = re.findall(r"Skills:\s*(.*)", req_skill_content)

    # # Parse each line into a list of skills
    # skills = [ [skill.strip() for skill in line.split(',')] for line in skill_lines ]
    # print(skills)
    # # 2. Identify Skill Gaps
    skill_gap_analysis_problem = find_skill_gaps_problems(required_skills_problem, organization_skills)
    print(skill_gap_analysis_problem)
    print(type(skill_gap_analysis_problem))
    # Extract goal and problem
    goal = ""
    problem = ""

    for key in skill_gap_analysis_problem.keys():
        if key.lower().startswith("goal:"):
            goal = key[6:].strip()
        elif key.lower().startswith("problem:"):
            problem = key[9:].strip()

    print("goal:", f'"{goal}"')
    print("problem:", f'"{problem}"')
    goal_problem_dict = {"goal": goal, "problem": problem}

    # print("\nSkill Gap Analysis:")
    # # for item, analysis in skill_gap_analysis.items():
    # #     print(f"\n--- {item} ---")
    # #     print("Yess")
    # #     print("Existing Skills:", analysis["existing_skills"])
    # #     print("Missing Skills:", analysis["missing_skills"])
    all_missing_skills_problem = [analysis["missing_skills"] for analysis in skill_gap_analysis_problem.values()]
    # print(all_missing_skills_problem)
    abc = recommend_online_offline_courses_goal(business_goals, all_missing_skills_problem,course_data_json)
    xyz = recommend_online_offline_courses_problems(problem, all_missing_skills_problem,course_data_json)

    # print(abc)
    print("Start")
    print(abc)
    print("Start2")
    print(xyz)
    # qwe = recommend_online_offline_courses_problems1(problem, all_missing_skills_problem,course_data_json)
    # print(qwe)
    
#     print("all_missing_skills")
#     print(all_missing_skills)
#     print("===============================================================================")
#     new_goal_prob = str(list_goal_prob)
#     print(type(new_goal_prob))
#     all_missing_skills_str = str(all_missing_skills)
#     # print(type(all_missing_skills))
#     # print(list_goal_prob)
    # online_course_goal = recommend_online_courses_goal(groq_api_key,goal, all_missing_skills)
    # print(online_course_goal.content)
    # online_course_problem = recommend_online_courses_problem(groq_api_key,problem, all_missing_skills)
    # print(online_course_problem.content)
#     print(online_course.content)
#     print("//////////////////////////////////////////////////////////////////////////////////////////////")
#     print(online_course)
#     print(type(online_course.content))
#     of = online_course.content
    
#     raw_response = of.strip("```json").strip("```").strip()
#     #cleaned_response = json.loads(of)
#     print(raw_response)
#     # offline_course = recommend_courses(groq_api_key,list_goal_prob, all_missing_skills,course_data_json)
    
#     # print("offline_course[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]")
#     # print(offline_course)
#     # print("//////////////////////////")
#     # print(offline_course.content)
#     # of = offline_course.content
#     # raw_response = of.strip("```json").strip("```").strip()
#     # #cleaned_response = json.loads(of)
#     # print(raw_response)

#     # online_course = recommend_online_courses(groq_api_key,list_goal_prob, all_missing_skills)
#     # print("online_course[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]")
#     # print(online_course.content)
    # offline_course_goal = recommend_offline_courses_goal(groq_api_key,goal, all_missing_skills,course_data_json)
    # #print(offline_course_goal.content)
    # of = offline_course_goal.content
    # raw_response = of.strip("```json").strip("```").strip()
    # #cleaned_response = json.loads(of)
    # print("Goal")
    # print(raw_response)
    # offline_course_problem = recommend_offline_courses_problem(groq_api_key,problem, all_missing_skills_problem,course_data_json)
    # #print(offline_course_problem.content)# of = offline_course.content
    # of = offline_course_problem.content
    # raw_response = of.strip("```json").strip("```").strip()
    # #cleaned_response = json.loads(of)
    # print("Problem")
    # print(raw_response)
    
    
# #print(list_goal_prob)


    