Skip to content

Commit

Permalink
Fix bugs and warning using pylint.See
Browse files Browse the repository at this point in the history
  • Loading branch information
masciotta02 committed Oct 3, 2024
1 parent e383f0d commit 4374e1c
Showing 1 changed file with 25 additions and 5 deletions.
30 changes: 25 additions & 5 deletions esco/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
"""
ESCO skills and occupations database management and search functionality.
This module provides tools for loading, querying, and searching ESCO (European Skills,
Competences, Qualifications and Occupations) data, including support for vector-based
neural search capabilities.
"""

import logging
from pathlib import Path
from typing import List

import pandas as pd
from esco.vector import VectorDB

log = logging.getLogger(__name__)

Expand All @@ -14,13 +23,15 @@


def to_curie(uri: str):
"""Convert a full URI to a CURIE (Compact URI) format."""
for k, v in NS_MAP.items():
if uri.startswith(v):
return uri.replace(v, k)
raise ValueError(f"Unknown prefix for {uri}")


def from_curie(curie: str):
"""Convert a CURIE (Compact URI) to a full URI format."""
if curie.startswith(("http://", "https://")):
return curie
for k, v in NS_MAP.items():
Expand Down Expand Up @@ -74,16 +85,15 @@ def __init__(

if vector_idx_config:
try:
from esco.vector import VectorDB

self.vector_idx = VectorDB(
skills=self.skills,
force_recreate=False,
config=vector_idx_config,
)
except ImportError:
log.warning(
"Could not load Qdrant and langchain database. Maybe you need to `pip install .[langchain]`?"
"Could not load Qdrant and langchain database. "
"Maybe you need to `pip install .[langchain]`?"
)
self.vector_idx = None

Expand All @@ -97,13 +107,14 @@ def validate(self):
skills_count = self.skills.shape[0]
if vector_idx_count != skills_count:
raise ValueError(
f"Skills and vector index have different number of entries: {skills_count} vs {vector_idx_count}"
f"Skills and vector index have different "
f"number of entries: {skills_count} vs {vector_idx_count}"
)

return True

def create_vector_idx(self, vector_idx_config: dict = None):
from esco.vector import VectorDB
"""Create or recreate the vector index for skills search."""

if vector_idx_config:
self.vector_idx_config = vector_idx_config
Expand All @@ -118,20 +129,28 @@ def create_vector_idx(self, vector_idx_config: dict = None):

@staticmethod
def load_skills():
"""Load the ESCO skills data from the predefined JSON file."""
return load_table("skills")

@staticmethod
def load_occupations():
"""Load the ESCO occupations data from the predefined JSON file."""
return load_table("occupations")

def get_label(self, uri_or_curie: str):
"""Retrieve the label for a given ESCO skill URI or CURIE."""
uri = from_curie(uri_or_curie)
try:
return self.skills[self.skills.index == uri]["label"].iloc[0]
except IndexError:
return IndexError(f"URI not found in ESCO {uri_or_curie}")

def get(self, uri_or_curie: str):
"""
Retrieve the full skill data for a given ESCO skill URI or CURIE.
Returns a dictionary with skill details or None if not found.
"""
uri = from_curie(uri_or_curie)
try:
return self.skills[self.skills.index == uri].iloc[0].to_dict()
Expand Down Expand Up @@ -162,5 +181,6 @@ def search_neural(self, text: str, **params) -> List[dict]:
return self.vector_idx.search(text, **params)

def close(self):
"""Close the vector index if it exists."""
if self.vector_idx:
self.vector_idx.close()

0 comments on commit 4374e1c

Please sign in to comment.