Fix: #12. Add SkillType (#13)

* Fix: #12. Add SkillType * Refactor shell scripts. * Fix CI.
par-tec · Mar 18, 2024 · caa15ad · caa15ad
1 parent 10e74df
commit caa15ad
Show file tree

Hide file tree

Showing 6 changed files with 48 additions and 38 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -28,7 +28,8 @@ jobs:
           id
           git config --global --add safe.directory $PWD
           pre-commit install
-          pre-commit run -a
+          # Skill checks requiring docker.
+          SKIP=shellcheck pre-commit run -a
 
       # Store (expiring) logs on failure.
       # Retrieve artifacts via `gh run download`.

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -48,9 +48,9 @@ jobs:
   publish:
     # Publish to PyPI. See https://docs.pypi.org/trusted-publishers/using-a-publisher/
     timeout-minutes: 5
-    needs: test-job
+    needs: test
     runs-on: ubuntu-latest
-    container: python:3.10-slim
+    container: python:3.11-slim
     # Publish to the release environment.
     #   See https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
     environment: release

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -64,3 +64,7 @@ repos:
     - id: python-safety-dependencies-check
       files: requirements.txt
       args: ["--short-report"]
+- repo: https://github.com/koalaman/shellcheck-precommit
+  rev: v0.10.0
+  hooks:
+  -   id: shellcheck
diff --git a/README.md b/README.md
@@ -22,18 +22,24 @@ pip install esco[dev]
 
 ## Usage
 
-The simplest way to use this module is via the `LocalDB` class:
+The simplest way to use this module is via the `LocalDB` class,
+that wraps the ESCO dataset embedded in the package via the [json files](esco/esco.json.gz):
 
 ```python
 from esco import LocalDB
 
 esco_data = LocalDB()
 
-# Get a skill by its URI.
+# Get a skill by its cURIe.
 skill = esco_data.get("esco:b0096dc5-2e2d-4bc1-8172-05bf486c3968")
 
 # Search a list of skill using labels.
 skills = esco_data.search_products({"python", "java"})
+
+# Further queries can be done using the embedded dataframe.
+esco_data.skills.__class__ == pandas.core.frame.DataFrame
+
+esco_data.skills[esco_data.skills.label == "SQL Server"]
 ```
 
 To use extra features such as text to skill extraction
@@ -77,8 +83,6 @@ cv = cv_recognizer(text)
 cv_skills = cv.skills()
 ```
 
-
-
 If you have a sparql server with the ESCO dataset, you can use the `SparqlClient`:
 
 ```python
@@ -100,7 +104,6 @@ WHERE {
 skills = client.query(query)
 ```
 
-
 ## Development
 
 The jupyter notebook should work without the ESCO dataset,
@@ -110,11 +113,15 @@ To regenerate the NER model, you need the ESCO dataset in turtle format.
 
 :warning: before using this repository, you need to:
 
-1. download the ESCO 1.1.1 database in text/turtle format  `ESCO dataset - v1.1.1 - classification -  - ttl.zip` from the [ESCO portal](https://ec.europa.eu/esco/portal) and unzip the `.ttl` file under the`vocabularies` folder.
+1. download the ESCO 1.1.1 database in text/turtle format
+`ESCO dataset - v1.1.1 - classification -  - ttl.zip`
+from the [ESCO portal](https://ec.europa.eu/esco/portal)
+and unzip the `.ttl` file under the [`vocabularies`](vocabularies/) folder.
 
 1. execute the sparql server that will be used to serve the ESCO dataset,
    and wait for the server to spin up and load the ~700MB dataset.
-   :warning: It will take a couple of minutes, so you need to wait for the server to be ready.
+   :warning: It will take a couple of minutes,
+   so you need to wait for the server to be ready.
 
    ```bash
    docker-compose up -d virtuoso
@@ -126,13 +133,6 @@ To regenerate the NER model, you need the ESCO dataset in turtle format.
    tox -e py3
    ```
 
-1. run the API
-
-   ```bash
-   connexion run api/openapi.yaml &
-   xdg-open http://localhost:5000/esco/v0.0.1/ui/
-   ```
-
 ## Regenerate the model
 
 To regenerate the model, you need to setup the ESCO dataset as explained above
@@ -162,14 +162,14 @@ Please, see [CONTRIBUTING.md](CONTRIBUTING.md) for more details on:
 You can create new projects starting from this repository,
 so you can use a consistent CI and checks for different projects.
 
-Besides all the explanations in the [CONTRIBUTING.md](CONTRIBUTING.md) file, you can use the docker-compose file
+Besides all the explanations in the [CONTRIBUTING.md](CONTRIBUTING.md) file,
+you can use the docker-compose file
 (e.g. if you prefer to use docker instead of installing the tools locally)
 
 ```bash
 docker-compose run pre-commit
 ```
 
-
 ## Using on GCP
 
 If you need a GPU server, you can

diff --git a/schemas/skills.oas3.yaml b/schemas/skills.oas3.yaml
@@ -34,6 +34,18 @@ components:
       type: "string"
       maxLength: 100
       example: "Programming"
+    SkillType:
+      description: |-
+        The type of skill, according to the ESCO model.
+        The skill type is an URI, and it is expected to be
+        one of the following:
+        - http://data.europa.eu/esco/skill-type/knowledge
+        - http://data.europa.eu/esco/skill-type/skills
+      type: "string"
+      enum:
+        - "knowledge"
+        - "skill"
+      example: "knowledge"
     Skill:
       description: |-
         A skill schema according to the ESCO model.
@@ -80,16 +92,7 @@ components:
           items:
             $ref: "#/components/schemas/SkillLabel"
         skillType:
-          description: |-
-            The type of skill, according to the ESCO model.
-            The skill type is an URI, and it is expected to be
-            one of the following:
-            - http://data.europa.eu/esco/skill-type/knowledge
-            - http://data.europa.eu/esco/skill-type/skills
-          type: "string"
-          enum:
-            - "knowledge"
-            - "skill"
+          $ref: "#/components/schemas/SkillType"
       example:
         uri: "esco:ccd0a1d9-afda-43d9-b901-96344886e14d"
         label: "Programming"

diff --git a/sparql/virtuoso.sh b/sparql/virtuoso.sh
@@ -30,12 +30,12 @@ then
     isql-v -U dba -P dba < $sql_query_sql
 
   pkill -f virtuoso-t
-  echo "$(date +%Y-%m-%dT%H:%M:%S%:z)" >  $dba_pwd_lock
+  date +%Y-%m-%dT%H:%M:%S%:z >  $dba_pwd_lock
 fi
 
 load_data_lock=$SETTINGS_DIR/.data_loaded
 load_data_sql=$SETTINGS_DIR/load_data.sql
-if [ ! -f "$load_data_lock" -a -d "toLoad" ] ;
+if [ ! -f "$load_data_lock" ] && [ -d "toLoad" ] ;
 then
     echo "Start data loading from toLoad folder"
     pwd="dba"
@@ -46,11 +46,13 @@ then
     if [ "$DBA_PASSWORD" ]; then pwd="$DBA_PASSWORD" ; fi
     if [ "$DEFAULT_GRAPH" ]; then graph="$DEFAULT_GRAPH" ; fi
 
-    echo "ld_dir('toLoad', '*', '$graph');" >> $load_data_sql
-    echo "rdf_loader_run();" >> $load_data_sql
-    echo "exec('checkpoint');" >> $load_data_sql
-    echo "WAIT_FOR_CHILDREN; " >> $load_data_sql
-    echo "$(cat $load_data_sql)"
+    cat >> "${load_data_sql}" << EOF
+    ld_dir('toLoad', '*', '${graph}');
+    rdf_loader_run();
+    exec('checkpoint');
+    WAIT_FOR_CHILDREN;
+EOF
+    cat $load_data_sql
 
     virtuoso-t +wait && isql-v -U dba -P "$pwd" < $load_data_sql
 
@@ -65,7 +67,7 @@ load_on_virtuoso(){
   local sql_file="$SETTINGS_DIR/load_${label}.sql"
   local lock_file="$SETTINGS_DIR/.${label}_loaded"
 
-  if [ ! -f ".${label}_loaded" -a -d  "${path}" ];
+  if [ ! -f ".${label}_loaded" ] && [ -d  "${path}" ];
   then
     pwd="dba" ;
     echo "Loading OntoPiA ${label}"
@@ -82,7 +84,7 @@ EOF
     virtuoso-t +wait && isql-v -U dba -P "$pwd" < "/${sql_file}"
 
     pkill -f virtuoso-t
-    echo "$(date +%Y-%m-%dT%H:%M:%S%:z)" > "${lock_file}"
+    date +%Y-%m-%dT%H:%M:%S%:z > "${lock_file}"
 
   fi