import os
import time
import random
import re
import json
import requests
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from crawl4ai import AsyncWebCrawler
import asyncio
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredPowerPointLoader
from PyPDF2 import PdfReader
from langchain_experimental.text_splitter import SemanticChunker
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from urllib.parse import urlparse
from langchain.chains import LLMChain
import shutil
from collections import defaultdict

GROQ_API_KEY = "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"
OLLAMA_MODEL = "nomic-embed-text"



def TNA(groq_api_key,goal,problem):
    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        groq_api_key=groq_api_key,
        # other params...
    )
    #llm = ChatOpenAI(model_name="gpt-4-turbo", openai_api_key="your_api_key")
    skill_extraction_prompt = PromptTemplate(
    input_variables=["goal", "problem"],
    template = """
        You are an industry expert with over 10 years of experience in analyzing business goals and identifying the most critical skills required to achieve them.

        Given the business goal: "{goal}" and the business problem: "{problem}", analyze the requirements and provide a structured list of essential skills needed to address the issue effectively.

        - Ensure the list is precise, diverse, and highly relevant.
        - If there are 5 goals, provide at least 20 distinct skills in total.
        - Do not include explanations, extra text, or formatting.

        **Output Format:**  
        Goal: [Business Goal]  
        Problem: [Business Problem]  
        Skills: Data Analysis, Machine Learning, Python, Cybersecurity, Cloud Computing
        """
    )

    #skill_extraction_chain = LLMChain(llm=llm, prompt=skill_extraction_prompt)
    skill_extraction_chain = LLMChain(llm=llm, prompt=skill_extraction_prompt)
    response = skill_extraction_chain.run(goal=goal, problem=problem)
    extracted_skills = response.strip().split(", ")
    return extracted_skills


def clear_skill(req):
    skills = [item for item in req if not re.search(r'Goal:|Problem:', item)]

    # Clean and format skills
    skills = [skill.replace("\n\n", "").replace("\n", "").replace("Skills: ", "").strip() for skill in skills]

    # Print extracted skills
    return skills

def find_skill_gaps(required_skills):
    with open("data.json", "r") as file:
        data = json.load(file)

    # Remove duplicates and sort
    
    required_skills_set = set(required_skills) 
    organization_skills = set(data["organization_skills"])
    courses = data["courses"]
    missing_skills = required_skills_set - organization_skills
    existing_skills = required_skills_set & organization_skills
    return "Existing_skills:",existing_skills, "missing_skills:",missing_skills

def find_skill_gaps_and_recommend_courses(required_skills, organization_skills_data="data.json"):
    try:
        with open(organization_skills_data, "r") as file:
            data = json.load(file)
            organization_skills = set(data.get("organization_skills", []))
            courses = data.get("courses", [])
    except FileNotFoundError:
        print(f"Error: {organization_skills_data} not found.")
        return set(), set(required_skills), []
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from {organization_skills_data}.")
        return set(), set(required_skills), []

    required_skills_set = set(required_skills)
    missing_skills = required_skills_set - organization_skills
    existing_skills = required_skills_set & organization_skills
    recommended_courses = []

    for missing_skill in missing_skills:
        for course in courses:
            if "teaches_skills" in course and missing_skill in course["teaches_skills"]:
                if course not in recommended_courses:  # Avoid duplicates
                    recommended_courses.append(course)

    return existing_skills, missing_skills, recommended_courses



def extract_course_keywords(groq_api_key):
    with open("data.json", "r") as file:
        data = json.load(file)
    courses = set(data["courses"])
    
    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        groq_api_key=groq_api_key,
    )

    course_extraction_prompt = PromptTemplate(
        input_variables=["courses"],
        template="""
        You are an expert in learning and education. Extract the **core skills/keywords** from the following course names.

        **Courses:**  
        {courses}

        - Extract key **skills** related to each course.
        - Ensure skills are precise and relevant.
        - Do not include explanations, extra text, or formatting.

        **Output Format:**  
        Skills: Data Analysis, Cybersecurity, Python, Cloud Computing
        """
    )

    course_extraction_chain = LLMChain(llm=llm, prompt=course_extraction_prompt)
    response = course_extraction_chain.run(courses=", ".join(courses))

    # Extract course-related skills from response
    extracted_course_skills = response.replace("Skills:", "").strip().split(", ")
    return extracted_course_skills


def extract_course1(skills,groq_api_key):
    with open("data.json", "r") as file:
        data = json.load(file)
    courses = set(data["courses"])
    
    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        groq_api_key=groq_api_key,
    )

    course_extraction_prompt = PromptTemplate(
        input_variables=["skills", "courses"],
        template="""
        You are an expert in education and curriculum design. Your task is to **match skills with the most relevant courses** and rank them based on importance.

        **Required Skills:**  
        {skills}

        **Available Courses:**  
        {courses}

        - Identify the most suitable courses for each skill.
        - Prioritize courses that cover multiple relevant skills.
        - Ensure the most useful courses appear first.
        - Do not include explanations, extra text, or formatting.

        **Output Format:**  
        Security: Cloud Security Training  
        Data Analysis: Python for Data Science  
        Ethical Hacking: Ethical Hacking Basics  
        """
    )

    course_extraction_chain = LLMChain(llm=llm, prompt=course_extraction_prompt)
    response = course_extraction_chain.run(skills=", ".join(skills), courses=", ".join(courses))

    # Extract relevant courses from response
    relevant_courses = response.replace("Relevant Courses:", "").strip().split(", ")

    data["matched_courses"] = relevant_courses
    with open("data_output.json", "w") as file:
        json.dump(data, file, indent=4)
    return relevant_courses

def extract_relevant_courses(skills, groq_api_key):
    with open("data.json", "r") as file:
        data = json.load(file)
    courses = list(data["courses"])
    
    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0,
        groq_api_key=groq_api_key,
    )

    course_extraction_prompt = PromptTemplate(
        input_variables=["skills", "courses"],
        template="""
        You are an expert in matching technical skills with educational courses. For the following skills, identify the most relevant course from the provided list.

        **Skills needed:**
        {skills}

        **Available courses:**
        {courses}

        For each skill, return EXACTLY ONE most relevant course in this format:
        Skill: Course Name

        For example:
        Python: Python for Data Science and Machine Learning
        Database: PostgreSQL and MySQL Database Mastery
        Cloud: AWS Certified Solutions Architect Course

        Include ONLY the skill-course pairs with no additional text or explanations.
        """
    )

    course_extraction_chain = LLMChain(llm=llm, prompt=course_extraction_prompt)
    response = course_extraction_chain.run(skills=", ".join(skills), courses="\n".join(courses))
    
    # Process the response to extract skill-course mappings
    skill_course_mapping = {}
    lines = response.strip().split('\n')
    for line in lines:
        if ':' in line:
            skill, course = line.split(':', 1)
            skill = skill.strip()
            course = course.strip()
            if course in courses:  # Validate that course exists in available courses
                skill_course_mapping[skill] = course
    
    # Save results to JSON
    data["skill_course_mapping"] = skill_course_mapping
    with open("data_output.json", "w") as file:
        json.dump(data, file, indent=4)
    
    return skill_course_mapping

def extract_course(skills, groq_api_key):
    with open("data.json", "r") as file:
        data = json.load(file)
    
    courses = data["courses"]  # Keeping courses as a list

    llm = ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        groq_api_key=groq_api_key,
    )

    course_extraction_prompt = PromptTemplate(
    input_variables=["skills", "courses"],
    template="""
    You are an expert in education and curriculum design. Your task is to match skills with the most relevant courses from the provided list.

    **Skills to Match:**  
    {skills}

    **Available Courses:**  
    {courses}

    **Guidelines:**  
    - Each skill should have at least one **most relevant** course.  
    - Do **not** assign the same set of courses to multiple skills unless absolutely necessary.  
    - Avoid assigning irrelevant courses (e.g., UI/UX design to communication skills).  
    - Ensure the output is **strictly in valid JSON format** with the following structure:

    ```json
    {{
        "Skill Name": ["Most Relevant Course 1", "Most Relevant Course 2","Most Relevant Course 3","Most Relevant Course 24],
        "Another Skill": ["Most Relevant Course"]
    }}
    ```
    """
)

    course_extraction_chain = LLMChain(llm=llm, prompt=course_extraction_prompt)
    response = course_extraction_chain.run(skills=", ".join(skills), courses=", ".join(courses))
    print("JKLmn")
    #print(response)
    # Ensure the output is clean JSON
   

    return response

def find_skill_and_course_gaps(required_skills, extracted_course_skills):
    with open("data.json", "r") as file:
        data = json.load(file)

    # Convert skills into sets
    required_skills_set = set(required_skills)
    course_skills_set = set(extracted_course_skills)
    organization_skills = set(data["organization_skills"])

    # Identify skill gaps
    missing_skills = required_skills_set - organization_skills
    existing_skills = required_skills_set & organization_skills

    # Identify course gaps (match skills from courses with required skills)
    relevant_courses = required_skills_set & course_skills_set
    missing_courses = required_skills_set - course_skills_set

    return {
        "Existing Skills": existing_skills,
        "Missing Skills": missing_skills,
        "Relevant Courses": relevant_courses,
        "Missing Courses": missing_courses
    }

def match_courses_to_skills(courses, missing_skills):
    course_skill_mapping = defaultdict(list)
    
    for course in courses:
        course_lower = course.lower()
        for skill in missing_skills:
            if skill.lower() in course_lower:
                course_skill_mapping[course].append(skill)
    
    return course_skill_mapping



if __name__ == "__main__":
    GROQ_API_KEY = "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"  # Replace with your actual Groq API key
    industry = "Infrastructure"
    #goal = "Want to improve my tech team"
    # goal = [
    # "Want to improve my tech team",
    # "Enhancing Cybersecurity & Compliance",
    # "Driving Innovation & Technology Adoption"
    # ]
    # #problem = "My tech team is not able to work efficiently"
    # problem = [
    #     "My tech team is not able to work efficiently",
    #     "Cybersecurity Threats",
    #     "Talent Acquisition and Retention",
    #     "Software Development and Deployment Challenges",
    #     "Integration of Emerging Technologies"
    # ]
    goal = [
    "Want to improve my tech team",
    "Enhancing Cybersecurity & Compliance",
    "Driving Innovation & Technology Adoption",
    "Scaling Cloud Infrastructure",
    "Improving IT Cost Efficiency",
    "Enhancing Customer Experience with AI",
    "Optimizing Software Development Lifecycle",
    "Increasing Revenue Through Digital Transformation",
    "Improving Data-Driven Decision Making",
    "Ensuring Compliance with Industry Regulations"
    ]

    problem = [
        "My tech team is not able to work efficiently",
        "Cybersecurity Threats",
        "Talent Acquisition and Retention",
        "Software Development and Deployment Challenges",
        "Integration of Emerging Technologies",
        "High Operational Costs in Cloud Usage",
        "Inefficient Customer Support Systems",
        "Delays in Software Releases",
        "Lack of Skilled Data Analysts",
        "Outdated Security and Compliance Policies"
    ]
    
    tna = TNA(GROQ_API_KEY,goal,problem)
    clear = clear_skill(tna)
    #print(clear)
    find_skill_gaps1 = find_skill_gaps(clear)

    print(find_skill_gaps1)
    with open('data.json', 'r') as file:
        data = json.load(file)

        courses = data.get("courses", [])

    # Match courses to missing skills
    matched_courses = match_courses_to_skills(courses, find_skill_gaps1)

    # Print results
    for course, skills in matched_courses.items():
        print(f"Course: {course}\n  Matches Skills: {', '.join(skills)}\n")
    # skill_gap = find_skill_gaps(clear)
    # print("HELLO")
    # print(tna)
    # print(type(tna))
    # print("HELLO2")
    # print(skill_gap)
    print("Cleaning Complete")
    #course_extract = extract_course_keywords(GROQ_API_KEY)
    #course_extract = extract_course(clear,GROQ_API_KEY)
    #course_extract =extract_relevant_courses(clear,GROQ_API_KEY)
    #skill_course = find_skill_and_course_gaps(clear, course_extract)
    #print(course_extract)
    print("Hello")
    #print(skill_course)