o
    ;iS                     @   s4  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ ejejd edZe ddZe ddZd	d
ddZeeef ed< ddddZeeef ed< dedefddZdBdedededefddZde	e defddZd ede	ej fd!d"Zded#e	ej defd$d%Zd&ed'ed(ed ed)ed*ed+edefd,d-Z	.	.		dCd/eee
f d ed0ed*edee	e  d)ee deee
f fd1d2Ze d3krd4d5d6d7Z!d8d9id8d:id8d;igZ"e#d< ee!d=d.d.g d>Z$e#ej%e$d?d@ e#dA ee!d=d.de"d>Z&e#ej%e&d?d@ dS dS )D    N)DictListAnyOptional)levelskill_assessment_processorOLLAMA_API_URLz#http://localhost:11434/api/generateOLLAMA_MODELz
gemma3:12bBeginnerIntermediateAdvanced)         DIFFICULTY_LEVEL_MAPS1S2S3)r
   r   r   SKILL_ID_MAPtextreturnc                 C   s   t dd| dd } | d}|dkr| S d}t| |d |D ]#\}}|dkr0|d7 }q#|d	krF|d8 }|dkrF| ||d    S q#| |d S )
z>Strip markdown fences and pull the first complete JSON object.z```(?:json)? z```{r   Nr   })resubreplacestripfind	enumerate)r   startdepthich r%   Q/var/www/eduai.edurigo.com/skill_assessment/testing/skill_assessment_processor.py_extract_json   s   

r'   r   promptsystemretriesc                 C   s   t | |ddddddddd	}td
}td|d D ]H}z%td| d|d  d tjt|dd}|  |	 
ddW   S  tjjyb } z|}td| d|  W Y d }~qd }~ww |)NFjsong?i    i  g?g?)temperaturenum_ctxnum_predicttop_prepeat_penalty)modelr(   r)   streamformatoptionszNo attempts mader   r   zOllama call attempt /z ...ih  )r+   timeoutresponser   zOllama attempt z	 failed: )r	   RuntimeErrorrangeloggerinforequestspostr   raise_for_statusr+   get
exceptionsRequestExceptionwarning)r(   r)   r*   payloadlast_excattemptrespexcr%   r%   r&   _call_ollama,   s2   "rH   previously_generated_questionsc                 C   s\   | sdS g }t | dD ]\}}|dd }|r$|d| d|  q|s)dS d|S )z
    Build a human-readable summary of previously generated questions so the
    LLM knows exactly which questions to avoid.
    u=   None — this is the first attempt for this difficulty level.r   question_namer   z  z. 
)r    r?   r   appendjoin)rI   linesr#   qq_textr%   r%   r&   #_build_previously_generated_summaryH   s   
rQ   company_namec                 C   s   |   }|sg S d}g }t|}|t|d | d tj tjdd|tjd  }tjdd|tjd  }|rX| | krXt|}|t|d | d tj |S )z
    Build a list of compiled regex patterns that match the company name and
    its common variations (with/without corporate suffixes, partial tokens,
    abbreviations, etc.).
    z\s*(?:Pvt\.?\s*Ltd\.?|Private\s+Limited|Ltd\.?|Limited|Inc\.?|Incorporated|LLC|LLP|Corp\.?|Corporation|Co\.?\s*Ltd\.?|Co\.?|Pte\.?\s*Ltd\.?|GmbH|AG|S\.?A\.?|NV|PLC)z\s*?zh\s+(?:Pvt|Private|Ltd|Limited|Inc|Incorporated|LLC|LLP|Corp|Corporation|Co|Pte|GmbH|AG|SA|NV|PLC)[\s.]*$r   flags)r   r   escaperL   compile
IGNORECASEr   lower)rR   name_CORP_SUFFIXESpatternsescaped_full	core_nameescaped_corer%   r%   r&   _build_company_patterns^   s8   

r`   r\   c                 C   s   | r|s| S | }|D ]}t jd|j d d|t jd}|d|}q
t dd| }t dd| }|rH|d  rH|d  |d	d
  }|S )a  
    Remove any company-name reference from *text*.

    Strategy (in order):
    1. If company name appears as the start of a sentence followed by more
       content (e.g. "Ascent Cyber Solutions uses Python for parsing log files."),
       remove that entire leading clause/sentence.
    2. Otherwise, replace the company name inline, leaving the surrounding
       text intact.
    3. Clean up leftover whitespace / punctuation artefacts.
    z(?:^|(?<=\. ))z[^.?!]*[.?!]\s*r   rT   z\s{2,} z	^[,;:\s]+r   r   N)r   r   patternrX   r   islowerupper)r   r\   cleanedpatr%   r%   r&   _sanitize_company_references   s   rg   data	skill_gapdesignationdifficulty_labelrE   skill_idc                 C   s  |  dg }t|dkrtdt| dt|}g d}	g }
t|D ]\}}d|d  |d< ||d	< | d
}|dvrE|	| |d
< | dg }t|dkr`td|d  dt| d|d
 dkrdd |D }t|dkrtd|d  dt| d t|D ]
\}}|dk|d< q|d
 dkrdd |D }t|dk rtd|d  dt| d d|d d< d|d d< t| t|D ]\}}t	d| |d< |
dd q| dd}t|||d< |D ]}| d d}t|||d < q||d krtd|d  d!|d"|d  ||d< ||||d#g|d$< | dd s6td|d  d%|
| q$td&d'| d(d) }|
| d< d| d*< | 
d+d,| d'|  d-| d't jd(d.   | 
d/| d0| d1|  | S )2a  
    Enforce structure for exactly 3 questions at a single difficulty level.
    - Type 2 = single-correct MCQ  (exactly 1 isCorrect: true)
    - Type 3 = multi-correct MCQ   (at least 2 isCorrect: true)
    - Each question must have exactly 4 options.
    	questionsr   z"Expected exactly 3 questions, got z+. Ensure the LLM returns the correct count.)r   r   r   Qr   
questionId
difficultyquestionType)r   r   r4      z" must have exactly 4 options, got .r   c                 S      g | ]	}| d r|qS 	isCorrectr?   .0or%   r%   r&   
<listcomp>       z+_validate_and_repair_3q.<locals>.<listcomp>z (Type 2) had zA correct options; defaulting correct answer to the second option.rv   c                 S   rt   ru   rw   rx   r%   r%   r&   r{      r|   z (Type 3) had only zO correct option(s); defaulting correct answers to the first and second options.Tr   A   optionIdFquestionTextr   
optionTextz=: Stripped company-name reference from questionText. Before: z	  After: )skillId	skillName
skillLevelrE   skillsz has an empty questionText.z	[^a-z0-9]-N   totalQuestionsassessmentIdzassessment--a   assessmentTitlera    Assessment for )r?   len
ValueErrorr`   r    r:   rB   randomshufflechr
setdefaultrg   r;   r   rL   r   r   rY   uuiduuid4hex)rh   ri   rj   rR   rk   rE   rl   rm   company_patternsdefault_type_patternrepairedidxrO   qtoptscorrectoioptoriginal_q_textoriginal_optsafe_idr%   r%   r&   _validate_and_repair_3q   s   

	,r   r   user_detailsquestions_difficulty_levelc                 C   sl  |du rg }|  dpd }|  dpd }|  dpd }| p&d}	|du r1t |d	}t |d
| }
td| d| d| d|	 dt| 
 t|}d}t	 j
dd }dg d| d| d| d| d| d| d| d| d|  d| d| d| d| d | d!| d"|
 d#| d$| d%| d&| d!| d'|
 d#| d$| d%| d(| d!| d"|
 d#| d$| d%| d)}zt||}W n& ty0 } ztd*|  d+| d,d-d.g d/W  Y d}~S d}~ww t|}zt|}W n. tjyj } z td0| d1|dd2   d3| d,d-d.g d/W  Y d}~S d}~ww zt||||	|||
d4}W n& ty } ztd5|  d6| d,d-d.g d/W  Y d}~S d}~ww td7|d8  d9|d:  d;| d|  |S )<a  
    Generate exactly 3 skill-assessment questions for a single difficulty level
    and attempt combination, ensuring no previously generated questions are repeated.

    Parameters
    ----------
    user_details : dict
        Keys: skill_gap_name, designation_name, job_profile_name
    company_name : str
        Target organisation name.
    questions_difficulty_level : int
        1 = Beginner | 2 = Intermediate | 3 = Advanced (or dynamic if skills provided)
    attempt : int
        Which attempt for this difficulty level (1-based).
        A higher attempt number guarantees fresh questions distinct from
        all prior attempts for the same level.
    previously_generated_questions : list[dict] | None
        All previously generated questions at the *same* difficulty level.
        Pass an empty list (or None) for attempt 1.
    difficulty_label : str | None
        Dynamic string specifying the label for this difficulty level, overriding
        the static mapping based on questions_difficulty_level.
    Nskill_gap_namezGeneral Technical Skillsdesignation_nameSoftware Engineerjob_profile_namezTechnical Professionalzthe organisationr
   SzGenerating [z] | Difficulty: z | Attempt: z | Company: z! | Previously generated Q count: a  You are a senior technical recruiter and subject-matter expert. You create high-quality, unambiguous multiple-choice skill-assessment questions. Your output MUST be a single, complete, valid JSON object only. No markdown fences, no commentary, no truncation. Every opening brace and bracket must be properly closed. IMPORTANT: Never include any company name, organisation name, or brand name in questionText or optionText. All questions must be purely technical and universally applicable.   r   z`Generate a technical skill-assessment JSON for the following candidate:

Designation          : z
Job Profile          : z
Skill Focus          : z
Difficulty Level     : z
Current Attempt      : ug  

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
PREVIOUSLY GENERATED QUESTIONS — DO NOT REPEAT THESE
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
uf   

STRICT RULES
1.  Generate EXACTLY 3 questions — no more, no less.
2.  ALL 3 questions MUST be at "u  " difficulty.
3.  Question types:
      Type 2 = single-correct MCQ  → exactly 1 option has isCorrect: true
      Type 3 = multi-correct MCQ   → at least 2 options have isCorrect: true
4.  Recommended type pattern: Q1=Type 2, Q2=Type 3, Q3=Type 2
5.  Each question MUST have EXACTLY 4 options with optionIds A, B, C, D.
6.  Questions must be technically rigorous, clearly worded, and DIFFERENT
    from every question listed in the "PREVIOUSLY GENERATED QUESTIONS" section above.
7.  Do NOT include any placeholder text or ellipsis — return real content only.
8.  NEVER mention any company name, organisation name, or brand name in the
    questionText or optionText fields. Questions must be purely technical and
    universally applicable — for example, write "Which function is used to
    open a file in Python?" instead of "CompanyX often needs to read data
    from files. Which function is used to open a file in Python?".
    The company name is strictly internal metadata and must NEVER appear in
    any question or option content.

Return ONLY this JSON structure (all 3 questions fully populated):

{
  "assessmentId": "assessment-r   r   z-01",
  "assessmentTitle": "ra   r   zh",
  "totalQuestions": 3,
  "questions": [
    {
      "questionId": "Q1",
      "questionText": "<your z' question here>",
      "difficulty": "z:",
      "questionType": 2,
      "skills": [{"skillId": "z", "skillName": "z", "skillLevel": "z", "attempt": a  }],
      "options": [
        {"optionId": "A", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "B", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "C", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    },
    {
      "questionId": "Q2",
      "questionText": "<your z:",
      "questionType": 3,
      "skills": [{"skillId": "a  }],
      "options": [
        {"optionId": "A", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "B", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "C", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    },
    {
      "questionId": "Q3",
      "questionText": "<your ad  }],
      "options": [
        {"optionId": "A", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "B", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "C", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    }
  ]
}
zOllama call failed: z"Failed to contact Ollama service: errorzError generating assessmentr   )r   r   r   r   rm   zJSON parse error: z

Snippet: i  zLLM returned malformed JSON: )rh   ri   rj   rR   rk   rE   rl   zValidation failed: zAssessment validation failed: zAssessment ready: r   z | r   z questions | Difficulty: )r?   r   r   r   r:   r;   r   rQ   r   r   r   rM   rY   rH   	Exceptionr   r'   r+   loadsJSONDecodeErrorr   r   )r   rR   r   rE   rI   rk   ri   rj   job_profilecompanyrl   exclusion_blocksystem_promptr   r(   raw_textrG   json_strparsedfinalr%   r%   r&   generate_skill_assessment'  s  
###$$$)*,,,,679999CDFFFFS
	
r   __main__PythonzSenior Software Engineerr   )r   r   r   rJ   z9Which of the following data types is immutable in Python?zYWhat is the primary purpose of the `try...except` block in Python? Select all that apply.z=Which function is used to read input from the user in Python?z=== Attempt 1 (Beginner) ===zAscent Cyber Solutions Pvt Ltd)r   rR   r   rE   rI   rr   )indentu<   
=== Attempt 2 (Beginner — avoid previously generated) ===)r   )r   r   NN)'osr<   r+   loggingr   r   r   typingr   r   r   r   basicConfigINFO	getLoggerr:   getenvr   r	   r   intstr__annotations__r   r'   rH   rQ   Patternr`   rg   r   r   __name__	test_userprev_questionsprintresultdumpsresult2r%   r%   r%   r&   <module>   s   : 
	
/$
y



 
P