import os, requests, json, logging, re, uuid
from typing import Dict, List, Any, Optional

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("skill_assessment_processor")

# gemma3:12b — highest-capability model in your local Ollama registry
OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL   = os.getenv("OLLAMA_MODEL",   "gemma3:12b")

# Difficulty level mapping
DIFFICULTY_LEVEL_MAP: Dict[int, str] = {
    1: "Beginner",
    2: "Intermediate",
    3: "Advanced",
}

SKILL_ID_MAP: Dict[str, str] = {
    "Beginner":     "S1",
    "Intermediate": "S2",
    "Advanced":     "S3",
}


# ── Helpers ────────────────────────────────────────────────────────────────

def _extract_json(text: str) -> str:
    """Strip markdown fences and pull the first complete JSON object."""
    text = re.sub(r"```(?:json)?", "", text).replace("```", "").strip()
    start = text.find("{")
    if start == -1:
        return text
    depth = 0
    for i, ch in enumerate(text[start:], start):
        if ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return text[start : i + 1]
    return text[start:]


def _call_ollama(prompt: str, system: str, retries: int = 2) -> str:
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "system": system,
        "stream": False,
        "format": "json",
        "options": {
            "temperature": 0.4,
            "num_ctx": 8192,
            "num_predict": 4000,
            "top_p": 0.9,
            "repeat_penalty": 1.1,
        },
    }
    last_exc = RuntimeError("No attempts made")
    for attempt in range(1, retries + 2):
        try:
            logger.info(f"Ollama call attempt {attempt}/{retries + 1} ...")
            resp = requests.post(OLLAMA_API_URL, json=payload, timeout=360)
            resp.raise_for_status()
            return resp.json().get("response", "")
        except requests.exceptions.RequestException as exc:
            last_exc = exc
            logger.warning(f"Ollama attempt {attempt} failed: {exc}")
    raise last_exc


def _build_previously_generated_summary(
    previously_generated_questions: List[Dict],
) -> str:
    """
    Build a human-readable summary of previously generated questions so the
    LLM knows exactly which questions to avoid.
    """
    if not previously_generated_questions:
        return "None — this is the first attempt for this difficulty level."

    lines = []
    for i, q in enumerate(previously_generated_questions, 1):
        q_text = q.get("question_name", "").strip()
        if q_text:
            lines.append(f"  {i}. {q_text}")

    if not lines:
        return "None — this is the first attempt for this difficulty level."

    return "\n".join(lines)


def _build_company_patterns(company_name: str) -> List[re.Pattern]:
    """
    Build a list of compiled regex patterns that match the company name and
    its common variations (with/without corporate suffixes, partial tokens,
    abbreviations, etc.).
    """
    name = company_name.strip()
    if not name:
        return []

    # Corporate suffixes to strip when building variations
    _CORP_SUFFIXES = (
        r"\s*(?:Pvt\.?\s*Ltd\.?|Private\s+Limited|Ltd\.?|Limited|"
        r"Inc\.?|Incorporated|LLC|LLP|Corp\.?|Corporation|"
        r"Co\.?\s*Ltd\.?|Co\.?|Pte\.?\s*Ltd\.?|GmbH|AG|S\.?A\.?|NV|PLC)"
    )

    patterns: List[re.Pattern] = []

    # 1. Full company name (with optional suffixes)
    escaped_full = re.escape(name)
    patterns.append(
        re.compile(escaped_full + r"\s*" + _CORP_SUFFIXES + r"?", re.IGNORECASE)
    )

    # 2. Company name without any trailing corporate suffix
    core_name = re.sub(
        r"\s+(?:Pvt|Private|Ltd|Limited|Inc|Incorporated|LLC|LLP|Corp|"
        r"Corporation|Co|Pte|GmbH|AG|SA|NV|PLC)[\s.]*$",
        "", name, flags=re.IGNORECASE,
    ).strip()
    # Remove another layer (e.g. "Ascent Cyber Solutions Pvt Ltd" -> "Ascent Cyber Solutions")
    core_name = re.sub(
        r"\s+(?:Pvt|Private|Ltd|Limited|Inc|Incorporated|LLC|LLP|Corp|"
        r"Corporation|Co|Pte|GmbH|AG|SA|NV|PLC)[\s.]*$",
        "", core_name, flags=re.IGNORECASE,
    ).strip()

    if core_name and core_name.lower() != name.lower():
        escaped_core = re.escape(core_name)
        patterns.append(
            re.compile(escaped_core + r"\s*" + _CORP_SUFFIXES + r"?", re.IGNORECASE)
        )

    return patterns


def _sanitize_company_references(text: str, patterns: List[re.Pattern]) -> str:
    """
    Remove any company-name reference from *text*.

    Strategy (in order):
    1. If company name appears as the start of a sentence followed by more
       content (e.g. "Ascent Cyber Solutions uses Python for parsing log files."),
       remove that entire leading clause/sentence.
    2. Otherwise, replace the company name inline, leaving the surrounding
       text intact.
    3. Clean up leftover whitespace / punctuation artefacts.
    """
    if not text or not patterns:
        return text

    cleaned = text
    for pat in patterns:
        # First pass: remove full leading sentences that begin with the company name
        # e.g. "CompanyX uses Python for X. Which of the following..." → "Which of the following..."
        cleaned = re.sub(
            r"(?:^|(?<=\. ))" + pat.pattern + r"[^.?!]*[.?!]\s*",
            "", cleaned, flags=re.IGNORECASE,
        )
        # Second pass: remove any remaining inline mentions
        cleaned = pat.sub("", cleaned)

    # Clean up artefacts: double spaces, leading/trailing whitespace, orphan punctuation
    cleaned = re.sub(r"\s{2,}", " ", cleaned).strip()
    cleaned = re.sub(r"^[,;:\s]+", "", cleaned).strip()
    # Ensure first character is uppercase if text remains
    if cleaned and cleaned[0].islower():
        cleaned = cleaned[0].upper() + cleaned[1:]

    return cleaned


def _validate_and_repair_3q(
    data: Dict,
    skill_gap: str,
    designation: str,
    company_name: str,
    difficulty_label: str,
    attempt: int,
    skill_id: str,
) -> Dict:
    """
    Enforce structure for exactly 3 questions at a single difficulty level.
    - Type 2 = single-correct MCQ  (exactly 1 isCorrect: true)
    - Type 3 = multi-correct MCQ   (at least 2 isCorrect: true)
    - Each question must have exactly 4 options.
    """
    questions: List[Dict] = data.get("questions", [])

    if len(questions) != 3:
        raise ValueError(
            f"Expected exactly 3 questions, got {len(questions)}. "
            "Ensure the LLM returns the correct count."
        )

    # Build company-name regex patterns once for the whole batch
    company_patterns = _build_company_patterns(company_name)

    # Recommended type pattern for a 3-question set: Q1=Type2, Q2=Type3, Q3=Type2
    default_type_pattern = [2, 3, 2]

    repaired = []
    for idx, q in enumerate(questions):
        q["questionId"] = f"Q{idx + 1}"
        q["difficulty"] = difficulty_label

        qt = q.get("questionType")
        if qt not in (2, 3):
            q["questionType"] = default_type_pattern[idx]

        opts = q.get("options", [])
        if len(opts) != 4:
            raise ValueError(
                f"Q{idx + 1} must have exactly 4 options, got {len(opts)}."
            )

        # Normalise optionIds to A, B, C, D
        for oi, opt in enumerate(opts):
            opt["optionId"] = chr(65 + oi)
            opt.setdefault("isCorrect", False)

        # Type 2 — enforce exactly one correct answer
        if q["questionType"] == 2:
            correct = [o for o in opts if o.get("isCorrect")]
            if len(correct) != 1:
                logger.warning(
                    f"Q{idx + 1} (Type 2) had {len(correct)} correct options; "
                    "defaulting correct answer to option B."
                )
                for oi, opt in enumerate(opts):
                    opt["isCorrect"] = oi == 1  # default → B

        # Type 3 — enforce at least two correct answers
        if q["questionType"] == 3:
            correct = [o for o in opts if o.get("isCorrect")]
            if len(correct) < 2:
                logger.warning(
                    f"Q{idx + 1} (Type 3) had only {len(correct)} correct option(s); "
                    "defaulting correct answers to options A and B."
                )
                opts[0]["isCorrect"] = True
                opts[1]["isCorrect"] = True

        # ── Sanitize company name from question and option text ────────
        original_q_text = q.get("questionText", "")
        q["questionText"] = _sanitize_company_references(original_q_text, company_patterns)
        for opt in opts:
            original_opt = opt.get("optionText", "")
            opt["optionText"] = _sanitize_company_references(original_opt, company_patterns)

        if original_q_text != q["questionText"]:
            logger.info(
                f"Q{idx + 1}: Stripped company-name reference from questionText. "
                f"Before: {original_q_text!r}  After: {q['questionText']!r}"
            )

        q["options"] = opts
        q["skills"] = [
            {
                "skillId":    skill_id,
                "skillName":  skill_gap,
                "skillLevel": difficulty_label,
                "attempt":    attempt,
            }
        ]

        if not q.get("questionText", "").strip():
            raise ValueError(f"Q{idx + 1} has an empty questionText.")

        repaired.append(q)

    safe_id = re.sub(r"[^a-z0-9]", "-", company_name.lower())[:25]
    data["questions"]      = repaired
    data["totalQuestions"] = 3
    data.setdefault(
        "assessmentId",
        f"assessment-{safe_id}-{difficulty_label.lower()}-a{attempt}-{uuid.uuid4().hex[:6]}",
    )
    data.setdefault(
        "assessmentTitle",
        f"{skill_gap} {difficulty_label} Assessment for {designation}",
    )
    return data


# ── Public API ─────────────────────────────────────────────────────────────

def generate_skill_assessment(
    user_details: Dict[str, Any],
    company_name: str,
    questions_difficulty_level: int = 1,
    attempt: int = 1,
    previously_generated_questions: Optional[List[Dict]] = None,
) -> Dict[str, Any]:
    """
    Generate exactly 3 skill-assessment questions for a single difficulty level
    and attempt combination, ensuring no previously generated questions are repeated.

    Parameters
    ----------
    user_details : dict
        Keys: skill_gap_name, designation_name, job_profile_name
    company_name : str
        Target organisation name.
    questions_difficulty_level : int
        1 = Beginner | 2 = Intermediate | 3 = Advanced
    attempt : int
        Which attempt for this difficulty level (1-based).
        A higher attempt number guarantees fresh questions distinct from
        all prior attempts for the same level.
    previously_generated_questions : list[dict] | None
        All previously generated questions at the *same* difficulty level.
        Pass an empty list (or None) for attempt 1.
    """
    if previously_generated_questions is None:
        previously_generated_questions = []

    skill_gap   = (user_details.get("skill_gap_name")   or "General Technical Skills").strip()
    designation = (user_details.get("designation_name") or "Software Engineer").strip()
    job_profile = (user_details.get("job_profile_name") or "Technical Professional").strip()
    company     = company_name.strip() or "the organisation"

    # Resolve difficulty label; default to Beginner for unknown values
    difficulty_label = DIFFICULTY_LEVEL_MAP.get(questions_difficulty_level, "Beginner")
    skill_id         = SKILL_ID_MAP[difficulty_label]

    logger.info(
        f"Generating [{skill_gap}] | Difficulty: {difficulty_label} "
        f"| Attempt: {attempt} | Company: {company} "
        f"| Previously generated Q count: {len(previously_generated_questions)}"
    )

    # Build the exclusion block so the LLM does not repeat earlier questions
    exclusion_block = _build_previously_generated_summary(previously_generated_questions)

    system_prompt = (
        "You are a senior technical recruiter and subject-matter expert. "
        "You create high-quality, unambiguous multiple-choice skill-assessment questions. "
        "Your output MUST be a single, complete, valid JSON object only. "
        "No markdown fences, no commentary, no truncation. "
        "Every opening brace and bracket must be properly closed. "
        "IMPORTANT: Never include any company name, organisation name, or brand name "
        "in questionText or optionText. All questions must be purely technical "
        "and universally applicable."
    )

    # Use a generic slug for assessmentId so the LLM never sees the company name
    safe_id = uuid.uuid4().hex[:12]

    prompt = f"""Generate a technical skill-assessment JSON for the following candidate:

Designation          : {designation}
Job Profile          : {job_profile}
Skill Focus          : {skill_gap}
Difficulty Level     : {difficulty_label}
Current Attempt      : {attempt}

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
PREVIOUSLY GENERATED QUESTIONS — DO NOT REPEAT THESE
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
{exclusion_block}

STRICT RULES
1.  Generate EXACTLY 3 questions — no more, no less.
2.  ALL 3 questions MUST be at "{difficulty_label}" difficulty.
3.  Question types:
      Type 2 = single-correct MCQ  → exactly 1 option has isCorrect: true
      Type 3 = multi-correct MCQ   → at least 2 options have isCorrect: true
4.  Recommended type pattern: Q1=Type 2, Q2=Type 3, Q3=Type 2
5.  Each question MUST have EXACTLY 4 options with optionIds A, B, C, D.
6.  Questions must be technically rigorous, clearly worded, and DIFFERENT
    from every question listed in the "PREVIOUSLY GENERATED QUESTIONS" section above.
7.  Do NOT include any placeholder text or ellipsis — return real content only.
8.  NEVER mention any company name, organisation name, or brand name in the
    questionText or optionText fields. Questions must be purely technical and
    universally applicable — for example, write "Which function is used to
    open a file in Python?" instead of "CompanyX often needs to read data
    from files. Which function is used to open a file in Python?".
    The company name is strictly internal metadata and must NEVER appear in
    any question or option content.

Return ONLY this JSON structure (all 3 questions fully populated):

{{
  "assessmentId": "assessment-{safe_id}-{difficulty_label.lower()}-a{attempt}-01",
  "assessmentTitle": "{skill_gap} {difficulty_label} Assessment for {designation}",
  "totalQuestions": 3,
  "questions": [
    {{
      "questionId": "Q1",
      "questionText": "<your {difficulty_label} question here>",
      "difficulty": "{difficulty_label}",
      "questionType": 2,
      "skills": [{{"skillId": "{skill_id}", "skillName": "{skill_gap}", "skillLevel": "{difficulty_label}", "attempt": {attempt}}}],
      "options": [
        {{"optionId": "A", "optionText": "<option text>", "isCorrect": false}},
        {{"optionId": "B", "optionText": "<correct answer>", "isCorrect": true}},
        {{"optionId": "C", "optionText": "<option text>", "isCorrect": false}},
        {{"optionId": "D", "optionText": "<option text>", "isCorrect": false}}
      ]
    }},
    {{
      "questionId": "Q2",
      "questionText": "<your {difficulty_label} question here>",
      "difficulty": "{difficulty_label}",
      "questionType": 3,
      "skills": [{{"skillId": "{skill_id}", "skillName": "{skill_gap}", "skillLevel": "{difficulty_label}", "attempt": {attempt}}}],
      "options": [
        {{"optionId": "A", "optionText": "<correct answer>", "isCorrect": true}},
        {{"optionId": "B", "optionText": "<correct answer>", "isCorrect": true}},
        {{"optionId": "C", "optionText": "<option text>", "isCorrect": false}},
        {{"optionId": "D", "optionText": "<option text>", "isCorrect": false}}
      ]
    }},
    {{
      "questionId": "Q3",
      "questionText": "<your {difficulty_label} question here>",
      "difficulty": "{difficulty_label}",
      "questionType": 2,
      "skills": [{{"skillId": "{skill_id}", "skillName": "{skill_gap}", "skillLevel": "{difficulty_label}", "attempt": {attempt}}}],
      "options": [
        {{"optionId": "A", "optionText": "<option text>", "isCorrect": false}},
        {{"optionId": "B", "optionText": "<option text>", "isCorrect": false}},
        {{"optionId": "C", "optionText": "<correct answer>", "isCorrect": true}},
        {{"optionId": "D", "optionText": "<option text>", "isCorrect": false}}
      ]
    }}
  ]
}}
"""

    # ── Ollama call ────────────────────────────────────────────────────────
    try:
        raw_text = _call_ollama(prompt, system_prompt)
    except Exception as exc:
        logger.error(f"Ollama call failed: {exc}")
        return {
            "error": f"Failed to contact Ollama service: {exc}",
            "assessmentId": "error",
            "assessmentTitle": "Error generating assessment",
            "totalQuestions": 0,
            "questions": [],
        }

    # ── JSON extraction ────────────────────────────────────────────────────
    json_str = _extract_json(raw_text)

    try:
        parsed = json.loads(json_str)
    except json.JSONDecodeError as exc:
        logger.error(f"JSON parse error: {exc}\nSnippet: {json_str[:500]}")
        return {
            "error": f"LLM returned malformed JSON: {exc}",
            "assessmentId": "error",
            "assessmentTitle": "Error generating assessment",
            "totalQuestions": 0,
            "questions": [],
        }

    # ── Validation & repair ────────────────────────────────────────────────
    try:
        final = _validate_and_repair_3q(
            data=parsed,
            skill_gap=skill_gap,
            designation=designation,
            company_name=company,
            difficulty_label=difficulty_label,
            attempt=attempt,
            skill_id=skill_id,
        )
    except ValueError as exc:
        logger.error(f"Validation failed: {exc}")
        return {
            "error": f"Assessment validation failed: {exc}",
            "assessmentId": "error",
            "assessmentTitle": "Error generating assessment",
            "totalQuestions": 0,
            "questions": [],
        }

    logger.info(
        f"Assessment ready: {final['assessmentId']} "
        f"| {final['totalQuestions']} questions "
        f"| Difficulty: {difficulty_label} | Attempt: {attempt}"
    )
    return final


# ── CLI smoke-test ─────────────────────────────────────────────────────────
if __name__ == "__main__":
    test_user = {
        "skill_gap_name":    "Python",
        "designation_name":  "Senior Software Engineer",
        "job_profile_name":  "Software Engineer",
    }

    # Simulated previously-generated questions for attempt 2
    prev_questions = [
        {"question_name": "Which of the following data types is immutable in Python?"},
        {"question_name": "What is the primary purpose of the `try...except` block in Python? Select all that apply."},
        {"question_name": "Which function is used to read input from the user in Python?"},
    ]

    # Attempt 1 — no prior questions
    print("=== Attempt 1 (Beginner) ===")
    result = generate_skill_assessment(
        user_details=test_user,
        company_name="Ascent Cyber Solutions Pvt Ltd",
        questions_difficulty_level=1,
        attempt=1,
        previously_generated_questions=[],
    )
    print(json.dumps(result, indent=4))

    print("\n=== Attempt 2 (Beginner — avoid previously generated) ===")
    result2 = generate_skill_assessment(
        user_details=test_user,
        company_name="Ascent Cyber Solutions Pvt Ltd",
        questions_difficulty_level=1,
        attempt=2,
        previously_generated_questions=prev_questions,
    )
    print(json.dumps(result2, indent=4))