import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Function to recommend courses using content-based filtering
def content_based_recommendations(user_data, courses_data):
    # Combine course descriptions
    course_descriptions = [course['name'] + ' ' + course['short_description'] + ' ' + course['description'] for course in courses_data]

    # Initialize TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(course_descriptions)

    # Compute similarity scores
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get index of user's assigned and completed courses
    assigned_courses = [int(course_id) for course_id in user_data.get('assignedCourses', [])]
    completed_courses = [int(course_id) for course_id in user_data.get('completedCourses', [])]

    # Compute similarity scores for user's assigned and completed courses
    similarity_scores = cosine_sim[assigned_courses] + cosine_sim[completed_courses]

    # Sort courses by similarity score
    recommended_courses = []
    for course_id, score in enumerate(similarity_scores):
        recommended_courses.append((course_id, score))
    recommended_courses.sort(key=lambda x: x[1], reverse=True)

    # Extract course IDs of top recommendations
    top_recommendations = [str(course_id) for course_id, score in recommended_courses]

    return top_recommendations

# Function to recommend courses using collaborative filtering
def collaborative_filtering_recommendations(user_data, courses_data):
    # Convert user data to DataFrame
    user_df = pd.DataFrame(user_data)

    # Convert courses data to DataFrame
    courses_df = pd.DataFrame(courses_data)

    # Merge user and courses data on skills
    merged_df = pd.merge(user_df, courses_df, how='cross')

    # Compute similarity scores based on skills
    similarity_scores = cosine_similarity(merged_df['skills_x'].apply(lambda x: ' '.join(x)), merged_df['skills_y'].apply(lambda x: ' '.join(x)))

    # Scale similarity scores
    scaler = MinMaxScaler()
    similarity_scores_scaled = scaler.fit_transform(similarity_scores)

    # Add similarity scores to DataFrame
    merged_df['similarity'] = similarity_scores_scaled

    # Filter out assigned and completed courses
    assigned_courses = [int(course_id) for course_id in user_data.get('assignedCourses', [])]
    completed_courses = [int(course_id) for course_id in user_data.get('completedCourses', [])]
    merged_df = merged_df[~merged_df['courseId_y'].isin(assigned_courses + completed_courses)]

    # Group by course ID and compute average similarity score
    grouped_df = merged_df.groupby('courseId_y')['similarity'].mean().reset_index()

    # Sort courses by average similarity score
    recommended_courses = grouped_df.sort_values(by='similarity', ascending=False)['courseId_y'].tolist()

    return recommended_courses

# Main function to process recommendations
def process_recommendations(data):
    try:
        courses_data = data.get('courses_data', [])
        user_data = data.get('user_data', {})
        competency_data = data.get('competency_data', [])

        # Content-based recommendations
        content_based_rec = content_based_recommendations(user_data, courses_data)

        # Collaborative filtering recommendations
        collaborative_filtering_rec = collaborative_filtering_recommendations(user_data, courses_data)

        # Combine recommendations if needed
        recommendations = content_based_rec + collaborative_filtering_rec

        # Return top 10 recommendations
        return recommendations[:10]
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    try:
        # Load data from input
        input_data = json.loads(input())

        # Process the data and generate recommendations
        recommendations = process_recommendations(input_data)

        # Output the recommendations
        print(json.dumps(recommendations))

    except Exception as e:
        print(json.dumps({"error": str(e)}))
