o
    Å;ÖiÐS  ã                   @   s4  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ ejejd e d¡Ze  dd¡Ze  dd¡Zd	d
ddœZeeef ed< ddddœZeeef ed< dedefdd„ZdBdedededefdd„Zde	e defdd„Zd ede	ej fd!d"„Zded#e	ej defd$d%„Zd&ed'ed(ed ed)ed*ed+edefd,d-„Z	.	.		dCd/eee
f d ed0ed*edee	e  d)ee deee
f fd1d2„Ze d3krd4d5d6d7œZ!d8d9id8d:id8d;igZ"e#d<ƒ ee!d=d.d.g d>Z$e#ej%e$d?d@ƒ e#dAƒ ee!d=d.de"d>Z&e#ej%e&d?d@ƒ dS dS )Dé    N)ÚDictÚListÚAnyÚOptional)ÚlevelÚskill_assessment_processorÚOLLAMA_API_URLz#http://localhost:11434/api/generateÚOLLAMA_MODELz
gemma3:12bÚBeginnerÚIntermediateÚAdvanced)é   é   é   ÚDIFFICULTY_LEVEL_MAPÚS1ÚS2ÚS3)r
   r   r   ÚSKILL_ID_MAPÚtextÚreturnc                 C   sš   t  dd| ¡ dd¡ ¡ } |  d¡}|dkr| S d}t| |d… |ƒD ]#\}}|dkr0|d7 }q#|d	krF|d8 }|dkrF| ||d …   S q#| |d… S )
z>Strip markdown fences and pull the first complete JSON object.z```(?:json)?Ú z```Ú{éÿÿÿÿr   Nr   Ú})ÚreÚsubÚreplaceÚstripÚfindÚ	enumerate)r   ÚstartÚdepthÚiÚch© r%   úQ/var/www/eduai.edurigo.com/skill_assessment/testing/skill_assessment_processor.pyÚ_extract_json   s   

€r'   r   ÚpromptÚsystemÚretriesc                 C   sÊ   t | |ddddddddœd	œ}td
ƒ}td|d ƒD ]H}z%t d|› d|d › d¡ tjt|dd}| ¡  | 	¡  
dd¡W   S  tjjyb } z|}t d|› d|› ¡ W Y d }~qd }~ww |‚)NFÚjsongš™™™™™Ù?i    i   gÍÌÌÌÌÌì?gš™™™™™ñ?)ÚtemperatureÚnum_ctxÚnum_predictÚtop_pÚrepeat_penalty)Úmodelr(   r)   ÚstreamÚformatÚoptionszNo attempts mader   r   zOllama call attempt ú/z ...ih  )r+   ÚtimeoutÚresponser   zOllama attempt z	 failed: )r	   ÚRuntimeErrorÚrangeÚloggerÚinfoÚrequestsÚpostr   Úraise_for_statusr+   ÚgetÚ
exceptionsÚRequestExceptionÚwarning)r(   r)   r*   ÚpayloadÚlast_excÚattemptÚrespÚexcr%   r%   r&   Ú_call_ollama,   s2   ûú"€þrH   Úpreviously_generated_questionsc                 C   s\   | sdS g }t | dƒD ]\}}| dd¡ ¡ }|r$| d|› d|› ¡ q|s)dS d |¡S )z
    Build a human-readable summary of previously generated questions so the
    LLM knows exactly which questions to avoid.
    u=   None â€” this is the first attempt for this difficulty level.r   Úquestion_namer   z  z. Ú
)r    r?   r   ÚappendÚjoin)rI   Úlinesr#   ÚqÚq_textr%   r%   r&   Ú#_build_previously_generated_summaryH   s   €
rQ   Úcompany_namec                 C   s´   |   ¡ }|sg S d}g }t |¡}| t |d | d tj¡¡ tjdd|tjd  ¡ }tjdd|tjd  ¡ }|rX| ¡ | ¡ krXt |¡}| t |d | d tj¡¡ |S )z·
    Build a list of compiled regex patterns that match the company name and
    its common variations (with/without corporate suffixes, partial tokens,
    abbreviations, etc.).
    z¤\s*(?:Pvt\.?\s*Ltd\.?|Private\s+Limited|Ltd\.?|Limited|Inc\.?|Incorporated|LLC|LLP|Corp\.?|Corporation|Co\.?\s*Ltd\.?|Co\.?|Pte\.?\s*Ltd\.?|GmbH|AG|S\.?A\.?|NV|PLC)z\s*Ú?zh\s+(?:Pvt|Private|Ltd|Limited|Inc|Incorporated|LLC|LLP|Corp|Corporation|Co|Pte|GmbH|AG|SA|NV|PLC)[\s.]*$r   ©Úflags)r   r   ÚescaperL   ÚcompileÚ
IGNORECASEr   Úlower)rR   ÚnameÚ_CORP_SUFFIXESÚpatternsÚescaped_fullÚ	core_nameÚescaped_corer%   r%   r&   Ú_build_company_patterns^   s8   ÿ
ÿýüýü
ÿr`   r\   c                 C   s”   | r|s| S | }|D ]}t jd|j d d|t jd}| d|¡}q
t  dd|¡ ¡ }t  dd|¡ ¡ }|rH|d  ¡ rH|d  ¡ |d	d
…  }|S )a¿  
    Remove any company-name reference from *text*.

    Strategy (in order):
    1. If company name appears as the start of a sentence followed by more
       content (e.g. "Ascent Cyber Solutions uses Python for parsing log files."),
       remove that entire leading clause/sentence.
    2. Otherwise, replace the company name inline, leaving the surrounding
       text intact.
    3. Clean up leftover whitespace / punctuation artefacts.
    z(?:^|(?<=\. ))z[^.?!]*[.?!]\s*r   rT   z\s{2,}ú z	^[,;:\s]+r   r   N)r   r   ÚpatternrX   r   ÚislowerÚupper)r   r\   ÚcleanedÚpatr%   r%   r&   Ú_sanitize_company_references   s   þrg   ÚdataÚ	skill_gapÚdesignationÚdifficulty_labelrE   Úskill_idc                 C   sø  |   dg ¡}t|ƒdkrtdt|ƒ› dƒ‚t|ƒ}g d¢}	g }
t|ƒD ]\}}d|d › |d< ||d	< |  d
¡}|dvrE|	| |d
< |  dg ¡}t|ƒdkr`td|d › dt|ƒ› dƒ‚|d
 dkr’dd„ |D ƒ}t|ƒdkr’t d|d › dt|ƒ› d¡ t|ƒD ]
\}}|dk|d< q‡|d
 dkrÁdd„ |D ƒ}t|ƒdk rÁt d|d › dt|ƒ› d¡ d|d d< d|d d< t |¡ t|ƒD ]\}}t	d| ƒ|d< | 
dd¡ qÊ|  dd¡}t||ƒ|d< |D ]}|  d d¡}t||ƒ|d < qì||d krt d|d › d!|›d"|d ›¡ ||d< ||||d#œg|d$< |  dd¡ ¡ s6td|d › d%ƒ‚|
 |¡ q$t d&d'| ¡ ¡d(d)… }|
| d< d| d*< |  
d+d,|› d'| ¡ › d-|› d't ¡ jd(d.… › ¡ |  
d/|› d0|› d1|› ¡ | S )2a  
    Enforce structure for exactly 3 questions at a single difficulty level.
    - Type 2 = single-correct MCQ  (exactly 1 isCorrect: true)
    - Type 3 = multi-correct MCQ   (at least 2 isCorrect: true)
    - Each question must have exactly 4 options.
    Ú	questionsr   z"Expected exactly 3 questions, got z+. Ensure the LLM returns the correct count.)r   r   r   ÚQr   Ú
questionIdÚ
difficultyÚquestionType)r   r   r4   é   z" must have exactly 4 options, got Ú.r   c                 S   ó   g | ]	}|  d ¡r|‘qS ©Ú	isCorrect©r?   ©Ú.0Úor%   r%   r&   Ú
<listcomp>ß   ó    z+_validate_and_repair_3q.<locals>.<listcomp>z (Type 2) had zA correct options; defaulting correct answer to the second option.rv   c                 S   rt   ru   rw   rx   r%   r%   r&   r{   ê   r|   z (Type 3) had only zO correct option(s); defaulting correct answers to the first and second options.Tr   éA   ÚoptionIdFÚquestionTextr   Ú
optionTextz=: Stripped company-name reference from questionText. Before: z	  After: )ÚskillIdÚ	skillNameÚ
skillLevelrE   Úskillsz has an empty questionText.z	[^a-z0-9]ú-Né   ÚtotalQuestionsÚassessmentIdzassessment-ú-aé   ÚassessmentTitlera   ú Assessment for )r?   ÚlenÚ
ValueErrorr`   r    r:   rB   ÚrandomÚshuffleÚchrÚ
setdefaultrg   r;   r   rL   r   r   rY   ÚuuidÚuuid4Úhex)rh   ri   rj   rR   rk   rE   rl   rm   Úcompany_patternsÚdefault_type_patternÚrepairedÚidxrO   ÚqtÚoptsÚcorrectÚoiÚoptÚoriginal_q_textÚoriginal_optÚsafe_idr%   r%   r&   Ú_validate_and_repair_3q±   s”   ÿ
ÿÿÿ
ÿÿÿüÿ	,þþr¢   r   Úuser_detailsÚquestions_difficulty_levelc                 C   sl  |du rg }|   d¡pd ¡ }|   d¡pd ¡ }|   d¡pd ¡ }| ¡ p&d}	|du r1t  |d	¡}t  |d
|› ¡}
t d|› d|› d|› d|	› dt|ƒ› 
¡ t|ƒ}d}t 	¡ j
dd… }d g d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘| ¡ › ‘d‘|› ‘d‘|› ‘d‘|› ‘d‘|› ‘d ‘|› ‘d!‘|› ‘d"‘|
› ‘d#‘|› ‘d$‘|› ‘d%‘|› ‘d&‘|› ‘d!‘|› ‘d'‘|
› ‘d#‘|› ‘d$‘|› ‘d%‘|› ‘d(‘|› ‘d!‘|› ‘d"‘|
› ‘d#‘|› ‘d$‘|› ‘d%‘|› ‘d)‘¡}zt||ƒ}W n& ty0 } zt d*|› ¡ d+|› d,d-d.g d/œW  Y d}~S d}~ww t|ƒ}zt |¡}W n. tjyj } z t d0|› d1|dd2… › ¡ d3|› d,d-d.g d/œW  Y d}~S d}~ww zt||||	|||
d4}W n& tyž } zt d5|› ¡ d6|› d,d-d.g d/œW  Y d}~S d}~ww t d7|d8 › d9|d: › d;|› d|› ¡ |S )<a  
    Generate exactly 3 skill-assessment questions for a single difficulty level
    and attempt combination, ensuring no previously generated questions are repeated.

    Parameters
    ----------
    user_details : dict
        Keys: skill_gap_name, designation_name, job_profile_name
    company_name : str
        Target organisation name.
    questions_difficulty_level : int
        1 = Beginner | 2 = Intermediate | 3 = Advanced (or dynamic if skills provided)
    attempt : int
        Which attempt for this difficulty level (1-based).
        A higher attempt number guarantees fresh questions distinct from
        all prior attempts for the same level.
    previously_generated_questions : list[dict] | None
        All previously generated questions at the *same* difficulty level.
        Pass an empty list (or None) for attempt 1.
    difficulty_label : str | None
        Dynamic string specifying the label for this difficulty level, overriding
        the static mapping based on questions_difficulty_level.
    NÚskill_gap_namezGeneral Technical SkillsÚdesignation_nameúSoftware EngineerÚjob_profile_namezTechnical Professionalzthe organisationr
   ÚSzGenerating [z] | Difficulty: z | Attempt: z | Company: z! | Previously generated Q count: aé  You are a senior technical recruiter and subject-matter expert. You create high-quality, unambiguous multiple-choice skill-assessment questions. Your output MUST be a single, complete, valid JSON object only. No markdown fences, no commentary, no truncation. Every opening brace and bracket must be properly closed. IMPORTANT: Never include any company name, organisation name, or brand name in questionText or optionText. All questions must be purely technical and universally applicable.é   r   z`Generate a technical skill-assessment JSON for the following candidate:

Designation          : z
Job Profile          : z
Skill Focus          : z
Difficulty Level     : z
Current Attempt      : ug  

â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”
PREVIOUSLY GENERATED QUESTIONS â€” DO NOT REPEAT THESE
â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”
uf   

STRICT RULES
1.  Generate EXACTLY 3 questions â€” no more, no less.
2.  ALL 3 questions MUST be at "u  " difficulty.
3.  Question types:
      Type 2 = single-correct MCQ  â†’ exactly 1 option has isCorrect: true
      Type 3 = multi-correct MCQ   â†’ at least 2 options have isCorrect: true
4.  Recommended type pattern: Q1=Type 2, Q2=Type 3, Q3=Type 2
5.  Each question MUST have EXACTLY 4 options with optionIds A, B, C, D.
6.  Questions must be technically rigorous, clearly worded, and DIFFERENT
    from every question listed in the "PREVIOUSLY GENERATED QUESTIONS" section above.
7.  Do NOT include any placeholder text or ellipsis â€” return real content only.
8.  NEVER mention any company name, organisation name, or brand name in the
    questionText or optionText fields. Questions must be purely technical and
    universally applicable â€” for example, write "Which function is used to
    open a file in Python?" instead of "CompanyX often needs to read data
    from files. Which function is used to open a file in Python?".
    The company name is strictly internal metadata and must NEVER appear in
    any question or option content.

Return ONLY this JSON structure (all 3 questions fully populated):

{
  "assessmentId": "assessment-r…   r‰   z-01",
  "assessmentTitle": "ra   rŒ   zh",
  "totalQuestions": 3,
  "questions": [
    {
      "questionId": "Q1",
      "questionText": "<your z' question here>",
      "difficulty": "z:",
      "questionType": 2,
      "skills": [{"skillId": "z", "skillName": "z", "skillLevel": "z", "attempt": aœ  }],
      "options": [
        {"optionId": "A", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "B", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "C", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    },
    {
      "questionId": "Q2",
      "questionText": "<your z:",
      "questionType": 3,
      "skills": [{"skillId": "až  }],
      "options": [
        {"optionId": "A", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "B", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "C", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    },
    {
      "questionId": "Q3",
      "questionText": "<your ad  }],
      "options": [
        {"optionId": "A", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "B", "optionText": "<option text>", "isCorrect": false},
        {"optionId": "C", "optionText": "<correct answer>", "isCorrect": true},
        {"optionId": "D", "optionText": "<option text>", "isCorrect": false}
      ]
    }
  ]
}
zOllama call failed: z"Failed to contact Ollama service: ÚerrorzError generating assessmentr   )r«   rˆ   r‹   r‡   rm   zJSON parse error: z

Snippet: iô  zLLM returned malformed JSON: )rh   ri   rj   rR   rk   rE   rl   zValidation failed: zAssessment validation failed: zAssessment ready: rˆ   z | r‡   z questions | Difficulty: )r?   r   r   r   r:   r;   r   rQ   r“   r”   r•   rM   rY   rH   Ú	Exceptionr«   r'   r+   ÚloadsÚJSONDecodeErrorr¢   rŽ   )r£   rR   r¤   rE   rI   rk   ri   rj   Újob_profileÚcompanyrl   Úexclusion_blockÚsystem_promptr¡   r(   Úraw_textrG   Újson_strÚparsedÚfinalr%   r%   r&   Úgenerate_skill_assessment'  s  ÿÿþÿÿ
þýüûúõñ#Ý#Ý#Ý$Ü$Ü$Ü)×*Ö,Ô,Ô,Ô,Ô6Ê7É9Ç9Ç9Ç9ÇC½D¼FºFºFºFºSû€þû€þ
ù	û€þ
ÿþþÿr·   Ú__main__ÚPythonzSenior Software Engineerr§   )r¥   r¦   r¨   rJ   z9Which of the following data types is immutable in Python?zYWhat is the primary purpose of the `try...except` block in Python? Select all that apply.z=Which function is used to read input from the user in Python?z=== Attempt 1 (Beginner) ===zAscent Cyber Solutions Pvt Ltd)r£   rR   r¤   rE   rI   rr   )Úindentu<   
=== Attempt 2 (Beginner â€” avoid previously generated) ===)r   )r   r   NN)'Úosr<   r+   Úloggingr   r“   r   Útypingr   r   r   r   ÚbasicConfigÚINFOÚ	getLoggerr:   Úgetenvr   r	   r   ÚintÚstrÚ__annotations__r   r'   rH   rQ   ÚPatternr`   rg   r¢   r·   Ú__name__Ú	test_userÚprev_questionsÚprintÚresultÚdumpsÚresult2r%   r%   r%   r&   Ú<module>   s°   : 
ýý	ÿ
þ/$ÿþýüûúù
øyú
ÿþýü
ûú

ù 
Pýýûûß