import os
import time
import random
import re
import json
import requests
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from crawl4ai import AsyncWebCrawler
import asyncio
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader, UnstructuredPowerPointLoader
from PyPDF2 import PdfReader
from langchain_experimental.text_splitter import SemanticChunker
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from urllib.parse import urlparse
from langchain.docstore.document import Document

import shutil

GROQ_API_KEY = "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"
OLLAMA_MODEL = "nomic-embed-text"

def tna(Goals,Challenges, skills_db, groq_api_key, model_name="mixtral-8x7b-32768", temperature=0):
    # Initialize the chat model
    chat = ChatGroq(temperature=temperature, groq_api_key=groq_api_key, model_name=model_name)

    # Define the system prompt with escaped curly braces and f-string
    system_prompt = PromptTemplate(
        input_variables=["skills_db", "Goals","Challenges"],
        template="""
        You are an AI assistant specializing in Training Need Analysis (TNA). 
        Given a company's business goals, challenges, and existing skills database, 
        suggest skill areas for development. 

        **Existing Skills Database:**
        {skills_db}

        **Business Goals:**
        {Goals}

        **Challenges:**
        {Challenges}

        **Objective:**
        - Identify skill gaps.
        - Recommend relevant skill enhancements.
        - Provide justifications for each recommendation.
        - You can also give skills 
        - You may also suggest skills outside the existing database if they align with the business goals and challenges. But make sure you write there that these are exclusive from your skills
        - SUggest minimum 10 skills
        Return your response in a structured format:
        - **Recommended Skills**
        - **JSON Output**
        - **Why These Skills Are Important**
        """
    )


    # Create the PromptTemplate using from_template
    prompt = system_prompt.format(Goals=Goals,Challenges=Challenges,skills_db=skills_db)

    # Format the prompt with skills_db and text
   
    
    # Optionally, print the formatted prompt for debugging
    # print("Formatted Prompt:")
    # print(formatted_prompt)

    # Send the formatted prompt to ChatGroq
    try:
        output = chat.invoke(prompt)
        token_consumed = output.response_metadata['token_usage']['total_tokens']
        skill_tree = output.content
    except Exception as e:
        print(f"An error occurred during chat invocation: {e}")
        return None, 0

    return skill_tree, token_consumed



def tna2(Goals,Challenges, groq_api_key, model_name="mixtral-8x7b-32768", temperature=0):
    # Initialize the chat model
    chat = ChatGroq(temperature=temperature, groq_api_key=groq_api_key, model_name=model_name)

    # Define the system prompt with escaped curly braces and f-string
    system_prompt = PromptTemplate(
        input_variables=["Goals","Challenges"],
        template="""
        You are an AI assistant specializing in Training Need Analysis (TNA). 
        Given a company's business goals, challenges suggest skill areas for development. 

       
        **Business Goals:**
        {Goals}

        **Challenges:**
        {Challenges}

        **Objective:**
        - Identify skill gaps.
        - Recommend relevant skill enhancements.
        - Provide justifications for each recommendation.
        - You can also give skills 
        - SUggest minimum 10 skills
        - ALso provide courses to improve skills 
        - minimum 2 couses with each skills
        Return your response in a structured format:
        - **Recommended Skills**
        - **JSON Output**
        - **Why These Skills Are Important**
        """
    )


    # Create the PromptTemplate using from_template
    prompt = system_prompt.format(Goals=Goals,Challenges=Challenges)

    # Format the prompt with skills_db and text
   
    
    # Optionally, print the formatted prompt for debugging
    # print("Formatted Prompt:")
    # print(formatted_prompt)

    # Send the formatted prompt to ChatGroq
    try:
        output = chat.invoke(prompt)
        token_consumed = output.response_metadata['token_usage']['total_tokens']
        skill_tree = output.content
    except Exception as e:
        print(f"An error occurred during chat invocation: {e}")
        return None, 0

    return skill_tree, token_consumed

def create_and_save_embeddings(skills, courses, client_id):
    client_id = str(client_id)
    embedding_folder = os.path.join("my_embeddings", client_id)
    os.makedirs(embedding_folder, exist_ok=True)

    # Initialize embedding model
    embeddings = OllamaEmbeddings(model='nomic-embed-text')
    skills1 = re.findall(r'"(.*?)"', skills)
    courses1 = re.findall(r'"(.*?)"', courses)
    # Convert skills to documents
    skill_docs = [Document(page_content=skill) for skill in skills1]
    course_docs = [Document(page_content=course) for course in courses1]

    # Create FAISS index for skills
    skill_db = FAISS.from_documents(skill_docs, embedding=embeddings)
    skill_db.save_local(os.path.join(embedding_folder, "skills_faiss"))

    # Create FAISS index for courses
    course_db = FAISS.from_documents(course_docs, embedding=embeddings)
    course_db.save_local(os.path.join(embedding_folder, "courses_faiss"))

    print(f"Saved FAISS indexes for skills and courses in {embedding_folder}")

def retrieve_skills_and_courses(challenges, goals, client_id, top_k=10):
    client_id = str(client_id)
    embedding_folder = os.path.join("my_embeddings", client_id)

    # Load FAISS indexes
    embeddings = OllamaEmbeddings(model='nomic-embed-text')
    skill_db = FAISS.load_local(os.path.join(embedding_folder, "skills_faiss"), embeddings,allow_dangerous_deserialization=True)
    course_db = FAISS.load_local(os.path.join(embedding_folder, "courses_faiss"), embeddings,allow_dangerous_deserialization=True)

    # Convert challenges & goals into a single query string
    query = " ".join(challenges + goals)
    #query = " ".join(map(str, list(challenges) + list(goals)))
    print('quesry', query)
    # Retrieve top-k matching skills
    skill_matches = skill_db.similarity_search(query, k=top_k)
    print("SKill_match",skill_matches)
    matched_skills = [match.page_content for match in skill_matches]
    print("matched_skills",matched_skills)
    # Retrieve top-k matching courses
    course_matches = course_db.similarity_search(query, k=top_k)
    matched_courses = [match.page_content for match in course_matches]
    print("matched_courses",matched_courses)

    return matched_skills, matched_courses


def generate_tna_report(challenges, goals, matched_skills, matched_courses, groq_api_key):
    chat = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768")

    # Define system prompt
    system_prompt = PromptTemplate(
        input_variables=["challenges", "goals", "matched_skills", "matched_courses"],
        template="""
        You are an AI assistant performing Training Need Analysis (TNA). 
        Given an organization's challenges, goals, and the retrieved skills & courses, 
        finalize the **most critical** skill gaps and training recommendations.

        Give as many courses and skills you can give

        **Challenges:**
        {challenges}

        **Goals:**
        {goals}

        **Potential Skill Gaps Identified:**
        {matched_skills}

        **Matching Courses:**
        {matched_courses}

        **Output the final recommendation in JSON format:**
        ```json
        {{
            "Recommended Skills": [...],
            "Skill-Course Mapping": {{
                "Skill1": ["Course1", "Course2"],
                "Skill2": ["Course3"]
            }},
            "Why These Skills Are Important": {{
                "Skill1": "Justification...",
                "Skill2": "Justification..."
            }}
        }}
        ```
        """
    )

    # Format the prompt
    prompt = system_prompt.format(
        challenges=challenges,
        goals=goals,
        matched_skills=matched_skills,
        matched_courses=matched_courses
    )

    # Get response from LLM
    response = chat.invoke(prompt)

    # Extract structured JSON response
    return response.content

def run_tna_pipeline(challenges, goals, client_id, groq_api_key):
    # Retrieve skills & courses using FAISS
    matched_skills, matched_courses = retrieve_skills_and_courses(challenges, goals, client_id)

    # Generate structured recommendations using LLM
    final_recommendations = generate_tna_report(challenges, goals, matched_skills, matched_courses, groq_api_key)

    return final_recommendations

if __name__ == "__main__":
    GROQ_API_KEY = "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"  # Replace with your actual Groq API key
    industry = "Infrastructure"
    #skill_tree,token_consumed  = generate_skill_tree(prompt_text=industry, groq_api_key=GROQ_API_KEY)
    skillset ='''
      "Budgeting",
      "Cost Estimation",
    
      "Civil Engineering",
      "Structural Analysis",
   
      "AutoCAD",
      "Revit",
    
      "Communication",
      "Leadership",
      "Teamwork",
      "Urban Design",
      "Water Supply and Distribution"
    '''
    # skills = '''
    # "Python",
    # "JavaScript",
    # "SQL",
    # "C++",
    # "React.js",
    # "Node.js",
    # "Django",
    # "Flask",
    # "TensorFlow",
    # "PyTorch",
    # "Docker",
    # "Kubernetes",
    # "AWS",
    # "Google Cloud Platform (GCP)",
    # "RESTful APIs",
    # "GraphQL",
    # "Git",
    # "CI/CD Pipelines",
    # "Cybersecurity Principles",
    # "Data Visualization"
    # '''

    skills = [
    "Python",
    "JavaScript",
    "SQL",
    "C++",
    "React.js",
    "Node.js",
    "Django",
    "Flask",
    "TensorFlow",
    "PyTorch",
    "Docker",
    "Kubernetes",
    "AWS",
    "Google Cloud Platform (GCP)",
    "RESTful APIs",
    "GraphQL",
    "Git",
    "CI/CD Pipelines",
    "Cybersecurity Principles",
    "Data Visualization",
    "Machine Learning",
    "Deep Learning",
    "NLP",
    "Data Science",
    "Algorithms",
    "Data Engineering",
    "Cloud Computing",
    "Agile Methodology",
    "Scrum",
    "Jupyter Notebooks",
    "Data Wrangling",
    "Big Data",
    "Apache Spark",
    "Hadoop",
    "MySQL",
    "PostgreSQL",
    "MongoDB",
    "SQLite",
    "NoSQL",
    "Microsoft SQL Server",
    "Elasticsearch",
    "Apache Kafka",
    "Pandas",
    "NumPy",
    "Scikit-learn",
    "Matplotlib",
    "Seaborn",
    "Plotly",
    "Tableau",
    "Power BI",
    "Excel",
    "Google Sheets",
    "R",
    "Hadoop Distributed File System (HDFS)",
    "MapReduce",
    "Data Mining",
    "Cloud Storage",
    "Data Pipeline Development",
    "ETL",
    "Automated Testing",
    "Unit Testing",
    "Integration Testing",
    "Test-Driven Development (TDD)",
    "Behavior-Driven Development (BDD)",
    "Scrum Master",
    "Lean Methodology",
    "Kanban",
    "Microservices Architecture",
    "Serverless Architecture",
    "DevOps",
    "Automation",
    "Jenkins",
    "Ansible",
    "Chef",
    "Puppet",
    "Terraform",
    "Virtualization",
    "Linux",
    "Windows Server",
    "Bash",
    "PowerShell",
    "Vim",
    "Zsh",
    "AWS Lambda",
    "S3",
    "EC2",
    "EKS",
    "RDS",
    "SQS",
    "DynamoDB",
    "VPC",
    "IAM",
    "CloudFormation",
    "GCP Compute Engine",
    "Google Kubernetes Engine (GKE)",
    "Google Cloud Storage",
    "Google BigQuery",
    "Google Pub/Sub",
    "Google Cloud Functions",
    "Azure",
    "Azure DevOps",
    "Azure Kubernetes Service (AKS)",
    "Azure Functions",
    "Azure Blob Storage",
    "Azure SQL Database",
    "Azure Active Directory",
    "Cloud Security",
    "Multi-cloud Solutions",
    "Google Analytics",
    "Firebase",
    "OAuth 2.0",
    "JWT",
    "Single Sign-On (SSO)",
    "LDAP",
    "Identity and Access Management (IAM)",
    "Virtual Private Network (VPN)",
    "Penetration Testing",
    "Ethical Hacking",
    "Network Security",
    "Firewalls",
    "Web Application Security",
    "Social Engineering",
    "Cryptography",
    "Digital Certificates",
    "Application Security",
    "Security Information and Event Management (SIEM)",
    "Vulnerability Management",
    "Compliance Frameworks",
    "GDPR",
    "HIPAA",
    "PCI DSS",
    "NIST",
    "CIS Controls",
    "ISO 27001",
    "OWASP",
    "Machine Learning Algorithms",
    "Supervised Learning",
    "Unsupervised Learning",
    "Reinforcement Learning",
    "Deep Neural Networks",
    "Convolutional Neural Networks (CNN)",
    "Recurrent Neural Networks (RNN)",
    "Long Short-Term Memory (LSTM)",
    "Generative Adversarial Networks (GANs)",
    "Autoencoders",
    "Transfer Learning",
    "Natural Language Processing (NLP)",
    "Text Mining",
    "Sentiment Analysis",
    "Topic Modeling",
    "Named Entity Recognition (NER)",
    "Word2Vec",
    "GloVe",
    "BERT",
    "GPT",
    "T5",
    "XLNet",
    "Text Classification",
    "Speech Recognition",
    "Machine Translation",
    "Chatbots",
    "Voice Assistants",
    "Image Recognition",
    "Object Detection",
    "Image Classification",
    "Semantic Segmentation",
    "Computer Vision",
    "OpenCV",
    "Dlib",
    "Image Processing",
    "Augmented Reality (AR)",
    "Virtual Reality (VR)",
    "Self-driving Cars",
    "Robotics",
    "Autonomous Systems",
    "IoT",
    "Edge Computing",
    "Blockchain",
    "Smart Contracts",
    "Ethereum",
    "Solidity",
    "Bitcoin",
    "Cryptocurrency",
    "DeFi",
    "NFTs",
    "Cryptographic Hashing",
    "Distributed Ledger Technology",
    "Supply Chain Blockchain",
    "Artificial Intelligence (AI)",
    "AI Ethics",
    "AI Safety",
    "AI Governance",
    "AI Explainability",
    "AI for Social Good",
    "AI in Healthcare",
    "AI in Finance",
    "AI in Education",
    "AI in Marketing",
    "AI in Gaming",
    "Recommender Systems",
    "Collaborative Filtering",
    "Content-Based Filtering",
    "Matrix Factorization",
    "Neural Collaborative Filtering",
    "Data Augmentation",
    "Hyperparameter Tuning",
    "Model Evaluation",
    "Cross-Validation",
    "Bias-Variance Tradeoff",
    "Overfitting",
    "Underfitting",
    "Dimensionality Reduction",
    "Principal Component Analysis (PCA)",
    "t-SNE",
    "Linear Regression",
    "Logistic Regression",
    "Decision Trees",
    "Random Forests",
    "Gradient Boosting Machines (GBM)",
    "XGBoost",
    "LightGBM",
    "CatBoost",
    "K-Nearest Neighbors (KNN)",
    "Support Vector Machines (SVM)",
    "Naive Bayes",
    "K-Means Clustering",
    "Hierarchical Clustering",
    "DBSCAN",
    "Latent Dirichlet Allocation (LDA)",
    "Association Rule Learning",
    "Time Series Forecasting",
    "ARIMA",
    "Prophet",
    "Seasonality",
    "Autoregressive Models",
    "Exponential Smoothing",
    "Anomaly Detection",
    "Ensemble Learning",
    "Bagging",
    "Boosting",
    "Stacking",
    "Hyperparameter Optimization",
    "Model Deployment",
    "Model Monitoring",
    "Model Retraining",
    "Model Drift",
    "Feature Engineering",
    "Feature Selection",
    "Feature Scaling",
    "Data Imputation",
    "Text Preprocessing",
    "Image Augmentation",
    "Data Augmentation",
    "API Development",
    "API Documentation",
    "OpenAPI",
    "Swagger",
    "API Versioning",
    "GraphQL APIs",
    "Serverless APIs",
    "FastAPI",
    "Flask API",
    "Django Rest Framework (DRF)",
    "REST API Design",
    "API Authentication",
    "OAuth",
    "JWT Authentication",
    "API Rate Limiting",
    "API Throttling",
    "API Caching",
    "API Security",
    "API Testing",
    "Postman",
    "SoapUI",
    "JMeter",
    "Load Testing",
    "Stress Testing",
    "Performance Testing",
    "Functional Testing",
    "End-to-End Testing",
    "Browser Automation",
    "Selenium",
    "Cypress",
    "Puppeteer",
    "Appium",
    "Test Automation",
    "UI Testing",
    "Mobile Testing",
    "Cross-Browser Testing",
    "Load Balancing",
    "Nginx",
    "Apache HTTP Server",
    "HAProxy",
    "Web Servers",
    "CDN",
    "Web Security",
    "SSL/TLS",
    "Content Security Policy (CSP)",
    "Cross-Origin Resource Sharing (CORS)",
    "Two-Factor Authentication (2FA)",
    "Content Management Systems (CMS)",
    "WordPress",
    "Drupal",
    "Joomla",
    "Magento",
    "Shopify",
    "Wix",
    "Squarespace",
    "Web Design",
    "User Experience (UX) Design",
    "User Interface (UI) Design",
    "Wireframing",
    "Prototyping",
    "Figma",
    "Sketch",
    "Adobe XD",
    "Photoshop",
    "Illustrator",
    "InVision",
    "Material Design",
    "Responsive Design",
    "Web Accessibility (WCAG)",
    "Web Performance Optimization",
    "Progressive Web Apps (PWA)",
    "Search Engine Optimization (SEO)",
    "Content Marketing",
    "Digital Marketing",
    "Google Ads",
    "Facebook Ads",
    "Email Marketing",
    "Social Media Marketing",
    "Influencer Marketing",
    "Marketing Automation",
    "Conversion Rate Optimization (CRO)",
    "Customer Relationship Management (CRM)",
    "Salesforce",
    "HubSpot",
    "Zoho CRM",
    "Marketing Analytics",
    "A/B Testing",
    "Split Testing",
    "Customer Segmentation",
    "Lead Generation",
    "Customer Acquisition",
    "Customer Retention",
    "E-commerce",
    "Shopify Development",
    "Magento Development",
    "WooCommerce",
    "Dropshipping",
    "Supply Chain Management",
    "Logistics",
    "Inventory Management",
    "Project Management",
    "Trello",
    "Jira",
    "Asana",
    "Basecamp",
    "Monday.com",
    "Microsoft Project",
    "ClickUp",
    "Task Management",
    "Risk Management",
    "Stakeholder Management",
    "Budgeting",
    "Resource Allocation",
    "Time Management",
    "Communication Skills",
    "Presentation Skills",
    "Problem-Solving",
    "Critical Thinking",
    "Team Collaboration",
    "Leadership",
    "Conflict Resolution",
    "Customer Service",
    "Negotiation Skills",
    "Sales",
    "Business Development",
    "Strategic Planning",
    "Business Analysis",
    "Financial Modeling",
    "Forecasting",
    "Budgeting",
    "Cost Management",
    "Corporate Finance",
    "Venture Capital",
    "Private Equity",
    "Investment Banking",
    "Mergers and Acquisitions (M&A)",
    "Due Diligence",
    "Financial Reporting",
    "Audit",
    "Taxation",
    "Compliance",
    "Accounting",
    "Data Privacy",
    "Risk Analysis",
    "Financial Accounting",
    "Management Accounting",
    "Financial Statements",
    "ERP Systems",
    "SAP",
    "Oracle ERP",
    "NetSuite",
    "QuickBooks",
    "Xero",
    "Sage Accounting",
    "Public Speaking",
    "Writing",
    "Technical Writing",
    "Copywriting",
    "Content Creation",
    "Editing",
    "Blogging",
    "Proofreading",
    "Social Media Content",
    "Video Editing",
    "Adobe Premiere Pro",
    "Final Cut Pro",
    "DaVinci Resolve",
    "Film Production",
    "Podcasting",
    "Photography",
    "Videography",
    "Animation",
    "2D Animation",
    "3D Animation",
    "Motion Graphics",
    "Illustration",
    "Graphic Design",
    "Branding",
    "Logo Design",
    "Typography",
    "Print Design",
    "Packaging Design",
    "UI/UX Prototyping",
    "App Design",
    "Web Development",
    "Mobile Development",
    "Game Development",
    "Unity",
    "Unreal Engine",
    "C#",
    "Swift",
    "Objective-C",
    "Kotlin",
    "Android Development",
    "iOS Development",
    "Xcode",
    "Android Studio",
    "React Native",
    "Flutter",
    "SwiftUI",
    "Backend Development",
    "Frontend Development",
    "Full Stack Development",
    "HTML5",
    "CSS3",
    "SASS",
    "LESS",
    "Bootstrap",
    "Tailwind CSS",
    "Material UI",
    "JQuery",
    "Vue.js",
    "Angular",
    "Ember.js",
    "Backbone.js",
    "JavaScript Frameworks",
    "WebAssembly",
    "WebRTC",
    "WebSockets",
    "Web APIs",
    "Canvas API",
    "Geolocation API",
    "Fetch API",
    "IndexedDB",
    "Service Workers",
    "Web Scraping",
    "BeautifulSoup",
    "Selenium WebDriver",
    "Scrapy",
    "Puppeteer",
    "Requests",
    "HTTP Requests",
    "API Calls",
    "GraphQL Queries",
    "Database Queries",
    "Data Modeling",
    "ER Diagrams",
    "Relational Databases",
    "Non-relational Databases",
    "Data Backup",
    "Data Replication",
    "Data Migration",
    "Database Indexing",
    "Database Sharding",
    "Data Consistency",
    "Data Integrity",
    "ACID Principles",
    "CAP Theorem",
    "Database Transactions",
    "ACID Compliance",
    "Database Normalization",
    "SQL Joins",
    "SQL Subqueries",
    "SQL Views",
    "SQL Stored Procedures",
    "SQL Triggers",
    "Database Constraints",
    "Foreign Keys",
    "Primary Keys",
    "Indexes",
    "NoSQL Databases",
    "Cassandra",
    "Redis",
    "Memcached",
    "Graph Databases",
    "Neo4j",
    "OrientDB",
    "ArangoDB",
    "Time-Series Databases",
    "InfluxDB",
    "Prometheus",
    "Telegraf",
    "Grafana",
    "TimescaleDB",
    "Data Lakes",
    "Data Warehousing",
    "ETL Pipelines",
    "Data Transformation",
    "Data Loading",
    "Data Extraction",
    "Apache NiFi",
    "Talend",
    "Fivetran",
    "Airflow",
    "DBT",
    "Apache Flume",
    "Kafka Streams",
    "AWS Glue",
    "Google Cloud Dataflow",
    "Microsoft Azure Data Factory",
    "Data Quality",
    "Data Lineage",
    "Data Cataloging",
    "Data Governance",
    "Data Privacy Regulations",
    "Data Encryption",
    "Tokenization",
    "Masking",
    "Access Control",
    "Data Auditing",
    "Data Compliance",
    "GDPR Compliance",
    "HIPAA Compliance",
    "Data Stewardship",
    "Metadata Management",
    "Data Archiving",
    "Cloud Data Storage",
    "Data Access Management",
    "Batch Processing",
    "Real-Time Data Processing",
    "Stream Processing",
    "Event-Driven Architecture",
    "Data Fusion",
    "Data Integration",
    "Message Queues",
    "Event Streams",
    "Serverless Data Pipelines",
    "Data Analysis",
    "Data Profiling",
    "Data Metrics",
    "Data Visualization Tools",
    "Power BI",
    "Tableau",
    "QlikView",
    "Looker",
    "Google Data Studio",
    "Kibana",
    "Grafana",
    "D3.js",
    "Plotly",
    "ggplot2",
    "Vega-Lite",
    "Business Intelligence",
    "Data Dashboards",
    "Reports",
    "Data Insights",
    "Predictive Analytics",
    "Prescriptive Analytics",
    "Descriptive Analytics",
    "Analytics Automation",
    "Advanced Analytics",
    "Data Analytics Platforms",
    "Sales Analytics",
    "Marketing Analytics",
    "Financial Analytics",
    "Customer Analytics",
    "Supply Chain Analytics",
    "Healthcare Analytics",
    "Sports Analytics",
    "Retail Analytics",
    "HR Analytics",
    "Public Sector Analytics",
    "AI-driven Analytics",
    "Predictive Modeling",
    "AI Chatbots",
    "Intelligent Assistants",
    "Cognitive Computing",
    "Human-Computer Interaction",
    "Speech Synthesis",
    "Speech Processing",
    "Natural Language Generation (NLG)",
    "Text-to-Speech",
    "Voice Synthesis",
    "Multimodal AI",
    "AI for Business Intelligence",
    "AI in Marketing",
    "AI in Sales",
    "AI in HR",
    "AI in Finance",
    "AI in Manufacturing",
    "AI in Logistics",
    "AI in Healthcare",
    "AI in Education",
    "AI in Retail",
    "AI in Customer Service",
    "AI in Legal Services",
    "AI in Insurance",
    "AI in Energy",
    "AI for Automation",
    "AI for Security",
    "AI for Fraud Detection",
    "AI for Healthcare Diagnosis",
    "AI for Risk Management",
    "AI for Predictive Maintenance",
    "AI for Personalization",
    "AI for Analytics",
    "AI for Content Creation",
    "AI for Data Science",
    "AI for Image Processing",
    "AI for Speech Recognition",
    "AI for Natural Language Understanding (NLU)",
    "AI for Data Mining",
    "AI for Knowledge Discovery",
    "AI for Search Optimization",
    "AI for Data Integration",
    "AI for Cybersecurity",
    "AI for Financial Forecasting",
    "AI for Decision Support",
    "AI for Smart Cities",
    "AI for Autonomous Vehicles",
    "AI for Virtual Assistants",
    "AI for Chatbots",
    "AI for Document Automation",
    "AI for Legal Compliance",
    "AI for Credit Scoring",
    "AI for Customer Insights",
    "AI for Marketing Optimization",
    "AI for Campaign Management",
    "AI for Social Media Marketing",
    "AI for Digital Advertising",
    "AI for Sentiment Analysis",
    "AI for Speech Analytics",
    "AI for Text Analytics",
    "AI for Social Media Monitoring",
    "AI for Email Marketing",
    "AI for Ecommerce Optimization",
    "AI for Fraud Prevention",
    "AI for Personalized Recommendations",
    "AI for Healthcare Operations",
    "AI for Customer Retention",
    "AI for Financial Modeling",
    "AI for Real Estate Analysis",
    "AI for Property Valuation",
    "AI for Wealth Management",
    "AI for Portfolio Optimization",
    "AI for Risk Assessment",
    "AI for Anti-Money Laundering (AML)",
    "AI for Tax Fraud Detection",
    "AI for Healthcare Fraud Detection",
    "AI for Autonomous Drones",
    "AI for Smart Manufacturing",
    "AI for Process Automation",
    "AI for Predictive Analytics in Marketing",
    "AI for Personalized Learning",
    "AI for Content Moderation",
    "AI for Sentiment Monitoring",
    "AI for Learning Management Systems",
    "AI for Knowledge Management",
    "AI for Virtual Classrooms",
    "AI for Gamification",
    "AI for Online Education",
    "AI for Skill Development",
    "AI for Employee Engagement",
    "AI for Performance Management",
    "AI for Organizational Development",
    "AI for HR Automation",
    "AI for Resume Screening",
    "AI for Recruiting",
    "Python",
    "JavaScript",
    "SQL",
    "C++",
    "React.js",
    "Node.js",
    "Django",
    "Flask",
    "TensorFlow",
    "PyTorch",
    "Docker",
    "Kubernetes",
    "AWS",
    "Google Cloud Platform (GCP)",
    "RESTful APIs",
    "GraphQL",
    "Git",
    "CI/CD Pipelines",
    "Cybersecurity Principles",
    "Data Visualization" ]
    # courses = '''
    
    # "Python for Everybody - University of Michigan (Coursera)",
    # "JavaScript: Understanding the Weird Parts (Udemy)",
    # "The Complete SQL Bootcamp 2024 (Udemy)",
    # "Docker for Beginners (Udemy)",
    # "Deep Learning Specialization - Andrew Ng (Coursera)",
    # "React - The Complete Guide (Udemy)",
    # "AWS Certified Solutions Architect - Associate (Udemy)",
    # "Introduction to Cyber Security (Udemy)",
    # "Git & GitHub for Beginners (Udemy)",
    # "Data Visualization with Python (Coursera)",
    # "Complete Python Developer in 2024 - Zero to Mastery (Udemy)",
    # "The Modern JavaScript Bootcamp (Udemy)",
    # "SQL for Data Science - University of California, Davis (Coursera)",
    # "Docker Mastery: With Kubernetes & Swarm (Udemy)",
    # "TensorFlow for Beginners (Udemy)",
    # "Full-Stack Web Development with React (Coursera)",
    # "AWS Fundamentals (Coursera)",
    # "Cybersecurity Specialization - University of Maryland (Coursera)",
    # "Version Control with Git (Coursera)",
    # "Data Visualization with Tableau (Udemy)",
    # "Python Data Structures - University of Michigan (Coursera)",
    # "JavaScript Basics for Beginners (Coursera)",
    # "Advanced SQL for Data Scientists (LinkedIn Learning)",
    # "Docker Deep Dive (Pluralsight)",
    # "Introduction to TensorFlow for AI, ML, and DL (Coursera)",
    # "React for Beginners (Scrimba)",
    # "AWS Cloud Practitioner Essentials (AWS Training)",
    # "The Complete Cyber Security Course (Udemy)",
    # "Mastering Git and GitHub (Udemy)",
    # "Storytelling with Data (LinkedIn Learning)",
    # "Python Programming Masterclass (Udemy)",
    # "Eloquent JavaScript (Self-paced Book & Online Exercises)",
    # "Databases and SQL for Data Science with Python (Coursera)",
    # "Kubernetes and Docker: The Complete Guide (Udemy)",
    # "TensorFlow Developer Certificate Training (Udemy)",
    # "Modern React with Redux (Udemy)",
    # "Architecting with AWS (Pluralsight)",
    # "Cybersecurity Essentials (Cisco Networking Academy)",
    # "The Git & GitHub Bootcamp (Udemy)",
    # "Data Visualization with D3.js (Udacity)",
    # "Automate the Boring Stuff with Python (Udemy)",
    # "JavaScript Algorithms and Data Structures (freeCodeCamp)",
    # "Introduction to SQL (Codecademy)",
    # "Docker Essentials: A Developer Introduction (IBM Skills Network)",
    # "TensorFlow in Practice (Coursera)",
    # "Frontend Development with React (Pluralsight)",
    # "AWS Certified DevOps Engineer (A Cloud Guru)",
    # "CompTIA Security+ Certification Training (Udemy)",
    # "Learn Git & GitHub (Codecademy)",
    # "Advanced Data Visualization with Python and Matplotlib (Pluralsight)"
    
    # '''
    courses = '''
    "Python for Data Science and Machine Learning",
    "JavaScript: The Advanced Concepts",
    "SQL for Data Analysis",
    "C++ Programming from Beginner to Expert",
    "Full-Stack Web Development with React and Node.js",
    "Django and Flask: Web Development with Python",
    "Deep Learning with TensorFlow and PyTorch",
    "Docker and Kubernetes: The Complete Guide",
    "AWS Certified Solutions Architect Course",
    "Google Cloud Platform (GCP) Fundamentals",
    "Building RESTful APIs with FastAPI",
    "GraphQL with Apollo and Node.js",
    "Version Control with Git and GitHub",
    "CI/CD Pipelines with Jenkins, GitHub Actions, and GitLab CI/CD",
    "Cybersecurity Fundamentals: Ethical Hacking and Penetration Testing",
    "Data Visualization with Matplotlib, Seaborn, and Tableau",
    "Machine Learning A-Z: Hands-On Python and R",
    "NLP with Python: Text Mining and Sentiment Analysis",
    "Computer Vision: OpenCV and Deep Learning",
    "Big Data and Hadoop Ecosystem",
    "Apache Spark for Big Data Processing",
    "Kafka Fundamentals and Real-Time Streaming",
    "PostgreSQL and MySQL Database Mastery",
    "MongoDB for NoSQL Databases",
    "Redis and Elasticsearch for High-Performance Databases",
    "Terraform and Ansible: Infrastructure as Code",
    "Linux Administration and Shell Scripting",
    "PowerShell for Windows Automation",
    "TypeScript and Modern JavaScript Development",
    "Vue.js and Angular: Frontend Web Development",
    "Svelte for Lightweight Web Apps",
    "Ruby on Rails: Full-Stack Web Development",
    "Spring Boot and Hibernate for Java Development",
    "Laravel for PHP Web Development",
    "ASP.NET Core for Modern Web Apps",
    "Swift and Kotlin for Mobile App Development",
    "Flutter and Dart: Cross-Platform Mobile Development",
    "Rust and GoLang for System Programming",
    "R Programming for Data Science",
    "Advanced Pandas and NumPy for Data Analysis",
    "XGBoost, LightGBM, and CatBoost for Advanced Machine Learning",
    "Jupyter Notebooks for Data Science",
    "FastAPI and Streamlit for Data Applications",
    "Microsoft Azure Data Engineering and AI",
    "Snowflake and Databricks for Data Warehousing",
    "Apache Airflow for Workflow Automation",
    "ETL Pipelines with Python and SQL",
    "Feature Engineering and Data Preprocessing",
    "A/B Testing and Hypothesis Testing for Data Science",
    "Time Series Analysis and Forecasting",
    "Reinforcement Learning with Python",
    "Generative Adversarial Networks (GANs) Masterclass",
    "Transformers and LLMs: BERT, GPT, and More",
    "LangChain and Llama Index for LLM Applications",
    "FAISS, Pinecone, and ChromaDB for Vector Databases",
    "LLM Fine-Tuning and Deployment",
    "Hugging Face Transformers and OpenAI API",
    "Groq API for High-Speed AI Applications",
    "Whisper Model for Speech Recognition",
    "Text-to-Speech and ASR Technologies",
    "Jenkins, Travis CI, and CircleCI for DevOps",
    "GitHub Actions and GitLab CI/CD for Automation",
    "Penetration Testing and Ethical Hacking",
    "Blockchain Development with Solidity and Ethereum",
    "Smart Contracts and DeFi Applications",
    "Web3.js and Ethers.js for Blockchain Development",
    "Metaverse and AR/VR Development",
    "Unity and Unreal Engine for Game Development",
    "3D Modeling with Blender and Maya",
    "UX/UI Design with Figma, Adobe XD, and Sketch",
    "Digital Marketing and SEO Optimization",
    "Google Analytics and Google Tag Manager",
    "Social Media Marketing and Content Strategy",
    "Scrum, Agile, and Kanban Project Management",
    "JIRA, Trello, and Confluence for Team Collaboration",
    "No-Code and Low-Code Development with Bubble.io and Webflow",
    "E-commerce Development with Shopify and WooCommerce",
    "Customer Relationship Management (CRM) with Salesforce and HubSpot",
    "Business Intelligence with Power BI, Tableau, and Google Data Studio",
    "SQL Server, Oracle Database, and PL/SQL for Enterprise Applications",
    "Graph Databases with Neo4j and D3.js Visualization",
    "Edge Computing and IoT Development",
    "Embedded Systems and Firmware Development",
    "Robotics and Autonomous Systems Programming",
    "Cybersecurity and Cloud Security Fundamentals",
    "DevSecOps and Zero Trust Security Implementation",
    "AI Ethics, Explainable AI (XAI), and AI Governance",
    "Data Privacy Regulations: GDPR, CCPA, and HIPAA Compliance",
    "Fintech Development and Algorithmic Trading",
    "Quantitative Analysis and Risk Management",
    "Market Analysis and Predictive Analytics"
    '''
    # goals='Want to imporve my tech team','Enhancing Cybersecurity & Compliance', 'Driving Innovation & Technology Adoption'
    # challenges='My tech team is not able to work efficiently ','Cybersecurity Threats','Talent Acquisition and Retention', 'Software Development and Deployment Challenges','Integration of Emerging Technologies'
    goals = [
    "Want to improve my tech team",
    "Enhancing Cybersecurity & Compliance",
    "Driving Innovation & Technology Adoption"
    ]

    challenges = [
        "My tech team is not able to work efficiently",
        "Cybersecurity Threats",
        "Talent Acquisition and Retention",
        "Software Development and Deployment Challenges",
        "Integration of Emerging Technologies"
    ]
    client_id = 809
    #skill_tree,token_consumed  = tna(skills_db = skillset, Goals=Goals,Challenges=Challenge, groq_api_key=GROQ_API_KEY)
    #skill_tree,token_consumed  = tna2(Goals=Goals,Challenges=Challenge, groq_api_key=GROQ_API_KEY)
    #skill_tree,token_consumed  = description_skill(industry=industry,groq_api_key=GROQ_API_KEY)
    #skill_tree = run_tna_pipeline(challenges, goals, client_id, GROQ_API_KEY)
    create_and_save_embeddings(skills, courses, client_id)
    matched_skills, matched_courses = retrieve_skills_and_courses(challenges, goals, client_id)
    # Generate structured recommendations using LLM
    final_recommendations = generate_tna_report(challenges, goals, matched_skills, matched_courses, GROQ_API_KEY)
    ##print(token_consumed)
    print(final_recommendations)
    #print(type(skill_tree))