From 25445ff056d331d475f2550162a0af0dcc1ee17e Mon Sep 17 00:00:00 2001 From: Bart van Beusekom Date: Mon, 22 Sep 2025 16:22:43 +0200 Subject: [PATCH 01/15] First step towards v5 compatibility - cleaned VPN as it is no longer supported in v5 - renamed partial to federated to be more in line with decorators in v5 - Made question section cleaner by assuming central function always has a client and federated func never - and similar but reverse for data - Stub of support for preprocessing and data extraction --- Dockerfile.jinja | 17 ---- README.md.jinja | 3 - algorithm_store.json.jinja | 18 ++-- cleanup.py | 10 +- copier.yml | 96 ++++--------------- macros/function_definition.jinja | 31 +++--- test/test.py.jinja | 8 +- .../implementation.rst.jinja | 14 +-- {{algorithm_name}}/__init__.py.jinja | 12 ++- {{algorithm_name}}/central.py.jinja | 43 +-------- {{algorithm_name}}/extract.py.jinja | 0 .../{partial.py.jinja => federated.py.jinja} | 4 +- {{algorithm_name}}/preprocess.py.jinja | 0 13 files changed, 80 insertions(+), 176 deletions(-) create mode 100644 {{algorithm_name}}/extract.py.jinja rename {{algorithm_name}}/{partial.py.jinja => federated.py.jinja} (92%) create mode 100644 {{algorithm_name}}/preprocess.py.jinja diff --git a/Dockerfile.jinja b/Dockerfile.jinja index 49a1842..b506484 100644 --- a/Dockerfile.jinja +++ b/Dockerfile.jinja @@ -9,23 +9,6 @@ ARG PKG_NAME="{{algorithm_name}}" COPY . /app RUN pip install /app -{% if use_vpn %} -# Specify the ports that are used for VPN communication, along with a label -# that helps you identify them. As an example, port 8888 is used here. The label -# must be specified as the port number with a 'p' prefix, e.g. 'p8888'. - {% if vpn_expose %} - {%- for port_dict in vpn_expose %} -EXPOSE {{port_dict.port}} -LABEL p{{port_dict.port}} = '{{port_dict.label}}' - {% endfor %} - {% else %} -# TODO provide a sensible label below. Feel free to add more ports if needed by -# adding additional EXPOSE and LABEL commands. -EXPOSE 8888 -LABEL p8888='some_label' - {% endif %} -{% endif %} - # Set environment variable to make name of the package available within the # docker image. ENV PKG_NAME=${PKG_NAME} diff --git a/README.md.jinja b/README.md.jinja index e36c6bd..ac52d0f 100644 --- a/README.md.jinja +++ b/README.md.jinja @@ -43,9 +43,6 @@ code for TODO instead of following the checklist below. settings. This will be used to push the Docker image to the registry in the github pipeline. {% endif %} -{% if use_vpn %} -- [ ] Review the EXPOSE and LABEL commands in the Dockerfile for VPN -{% endif %} - [ ] Finally, remove this checklist section to keep the README clean. ### Dockerizing your algorithm diff --git a/algorithm_store.json.jinja b/algorithm_store.json.jinja index f116aeb..28f91cc 100644 --- a/algorithm_store.json.jinja +++ b/algorithm_store.json.jinja @@ -12,12 +12,12 @@ "description": "{{algorithm_description}}", "type": "central", "databases": [ - {%- for idx in range(partial_function_number_databases) -%} + {%- for idx in range(federated_function_number_databases) -%} { - "name": "Partial database {{idx + 1}}" - }{%- if not loop.last or central_function_number_databases > 0 -%},{%- endif -%} + "name": "Central database {{idx + 1}}" + }{%- if not loop.last or federated_function_number_databases > 0 -%},{%- endif -%} {% endfor %} - {% for idx in range(central_function_number_databases) %} + {% for idx in range(federated_function_number_databases) %} { "name": "Central database {{idx + 1}}" }{%- if not loop.last -%},{%- endif -%} @@ -33,21 +33,21 @@ {% endfor %} ] }{%- endif -%} - {%- if has_central_function and has_partial_function -%},{%- endif -%} - {% if has_partial_function -%} + {%- if has_central_function and has_federated_function -%},{%- endif -%} + {% if has_federated_function -%} { - "name": "{{partial_function_name}}", + "name": "{{federated_function_name}}", "description": "", "type": "federated", "databases": [ - {%- for idx in range(partial_function_number_databases) -%} + {%- for idx in range(federated_function_number_databases) -%} { "name": "Database {{idx + 1}}" }{%- if not loop.last -%},{%- endif -%} {% endfor %} ], "arguments": [ - {%- for arg in partial_args -%} + {%- for arg in federated_args -%} { "name": "{{arg}}", "type": "", diff --git a/cleanup.py b/cleanup.py index a63f87d..6a886a9 100644 --- a/cleanup.py +++ b/cleanup.py @@ -24,10 +24,12 @@ def cleanup() -> None: print("Removing LICENSE file as no license was chosen...") Path("LICENSE").unlink() - # Remove partial function files if partial function is not defined - if not copier_config.get("has_partial_function"): - print("Removing partial function file as partial function is not defined...") - Path(algorithm_name, "partial.py").unlink() + # Remove federated function files if federated function is not defined + if not copier_config.get("has_federated_function"): + print( + "Removing federated function file as federated function is not defined..." + ) + Path(algorithm_name, "federated.py").unlink() # Remove central function files if central function is not defined if not copier_config.get("has_central_function"): diff --git a/copier.yml b/copier.yml index bcb1a28..71f2321 100644 --- a/copier.yml +++ b/copier.yml @@ -32,33 +32,6 @@ central_function_name: help: "What is the name of your central function?" default: central -central_function_client: - type: bool - when: "{{ has_central_function }}" - help: "Do you want to use a client in your central function?" - default: true - -central_function_data: - type: bool - when: "{{ has_central_function }}" - help: "Do you want to use data in your central function?" - default: false - -central_function_number_databases: - type: int - when: "{{ has_central_function and central_function_data }}" - help: "How many databases do you want to use in your central function?" - default: |- - {%- if has_central_function and central_function_data -%} - 1 - {%- else -%} - 0 - {%- endif -%} - validator: |- - {% if central_function_data and central_function_number_databases < 1 %} - Must be at least 1 - {% endif %} - central_args: type: json when: "{{ has_central_function }}" @@ -66,50 +39,44 @@ central_args: default: ['arg1', ] multiline: true -# ------------- Define partial algorithm function ---------- # +# ------------- Define federated algorithm function ---------- # -has_partial_function: +has_federated_function: type: bool - help: "Do you want to use a partial function in your algorithm?" + help: "Do you want to use a federated function in your algorithm?" default: true -partial_function_name: +federated_function_name: type: str - when: "{{ has_partial_function }}" - help: "What is the name of your partial function?" - default: partial - -partial_function_client: - type: bool - when: "{{ has_partial_function }}" - help: "Do you want to use a client in your partial function?" - default: false + when: "{{ has_federated_function }}" + help: "What is the name of your federated function?" + default: federated -partial_function_data: +federated_function_data: type: bool - when: "{{ has_partial_function }}" - help: "Do you want to use data in your partial function?" + when: "{{ has_federated_function }}" + help: "Do you want to use data in your federated function?" default: true -partial_function_number_databases: +federated_function_number_databases: type: int - when: "{{ has_partial_function and partial_function_data }}" - help: "How many databases do you want to use in your partial function?" + when: "{{ has_federated_function and federated_function_data }}" + help: "How many databases do you want to use in your federated function?" default: |- - {%- if has_partial_function and partial_function_data -%} + {%- if has_federated_function and federated_function_data -%} 1 {%- else -%} 0 {%- endif -%} validator: |- - {% if has_partial_function and partial_function_number_databases < 1 %} + {% if has_federated_function and federated_function_number_databases < 1 %} Must be at least 1 {% endif %} -partial_args: +federated_args: type: json - when: "{{ has_partial_function }}" - help: "Add a list of arguments to the partial function '{{ partial_function_name }}'" + when: "{{ has_federated_function }}" + help: "Add a list of arguments to the federated function '{{ federated_function_name }}'" default: ['arg1', ] multiline: true @@ -144,31 +111,6 @@ advanced: help: "Do you want to see the advanced options?" default: true -# ----------------------- Set up VPN ----------------------- # -use_vpn: - type: bool - when: "{{ advanced }}" - help: "Do you want to use the VPN network in your algorithm?" - default: false - -vpn_expose: - type: json - when: "{{ advanced and use_vpn }}" - help: | - Configure the ports to expose in the VPN. You can add multiple ports - by adding more dictionaries with keys 'label' and 'port'. - default: [{'port': 8888, 'label': 'my_label'}] - multiline: true - validator: | - {% for port_dict in vpn_expose %} - {% if 'port' not in port_dict %} - Each dictionary should have a 'port' key. - {% endif %} - {% if 'label' not in port_dict %} - Each dictionary should have a 'label' key. - {% endif %} - {% endfor %} - has_gh_pipeline: type: bool when: "{{ advanced }}" @@ -219,7 +161,7 @@ _tasks: # {% elif _copier_conf.os == 'windows' %} # Remove-Item LICENSE # {% endif %} - # TODO: similarly, delete if empty: central.py, partial.py + # TODO: similarly, delete if empty: central.py, federated.py # Call python script that cleans up - ["{{ _copier_python }}", cleanup.py] _message_after_copy: | diff --git a/macros/function_definition.jinja b/macros/function_definition.jinja index 1428fa1..bb4f24b 100644 --- a/macros/function_definition.jinja +++ b/macros/function_definition.jinja @@ -1,25 +1,32 @@ -{% macro func_def(func_name, data_decorator, client_decorator, num_dbs, - func_args) %} -{# Data decorator #} -{% if data_decorator -%} -@data({{num_dbs}}) -{% endif -%} +{% macro func_def(func_name, func_type, num_dbs, func_args) %} -{# Algorithm client decorator #} -{%- if client_decorator -%} +{# Function type decorator #} +{% if func_type == "central_compute" -%} +@central @algorithm_client +{% elif func_type == "federated_compute" -%} +@federated +{% elif func_type == "preprocessing" -%} +@preprocessing +{% elif func_type == "data_extraction" -%} +@data_extraction +{% endif -%} + +{# Data decorator #} +{%- if num_dbs > 0 -%} +@data({{num_dbs}}) {% endif -%} {# Function definition #} def {{func_name}}( {{" "}}{# <- Add 4 whitespaces before function args start #} {# Add client argument #} -{%- if client_decorator %} +{%- if func_type == "central_compute" %} client: AlgorithmClient {%- endif -%} {# Add data arguments #} -{%- if data_decorator -%} +{%- if num_dbs > 0 -%} {# Add comma if previous arguments exist #} - {%- if client_decorator -%} + {%- if func_type == "central_compute" -%} ,{{" "}} {%- endif -%} {%- for db_num in range(num_dbs)-%} @@ -32,7 +39,7 @@ client: AlgorithmClient {# Add additional arguments to function #} {%- if func_args %} {# Add comma if previous arguments exist #} - {%- if client_decorator or data_decorator -%} + {%- if func_type == "central_compute" or num_dbs > 0 -%} ,{{" "}} {%- endif -%} {# Add arguments #} diff --git a/test/test.py.jinja b/test/test.py.jinja index 10ce466..13784f1 100644 --- a/test/test.py.jinja +++ b/test/test.py.jinja @@ -57,14 +57,14 @@ results = client.wait_for_results(central_task.get("id")) print(results) {% endif %} -{% if has_partial_function %} -# Run the partial method for all organizations +{% if has_federated_function %} +# Run the federated method for all organizations task = client.task.create( input_={ - "method":"{{partial_function_name}}", + "method":"{{federated_function_name}}", "kwargs": { {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(partial_function_name, partial_args, 3) }} + {{ kwargs_define(federated_function_name, federated_args, 3) }} } }, organizations=org_ids diff --git a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja index 1677f8e..4b0690f 100644 --- a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja @@ -12,16 +12,16 @@ The central part is responsible for the orchestration and aggregation of the alg .. Describe the central function here. {% endif %} -{% if has_partial_function %} -Partials +{% if has_federated_function %} +Federated functions -------- -Partials are the computations that are executed on each node. The partials have access -to the data that is stored on the node. The partials are executed in parallel on each -node. +Federated functions are the computations that are executed on each node. The federated +functions have access to the data that is stored on the node. These functions are +executed in parallel on each node. -``{{partial_function_name}}`` +``{{federated_function_name}}`` ~~~~~~~~~~~~~~~~ -.. Describe the partial function. +.. Describe the function. {% endif %} diff --git a/{{algorithm_name}}/__init__.py.jinja b/{{algorithm_name}}/__init__.py.jinja index c84baf2..a49344c 100644 --- a/{{algorithm_name}}/__init__.py.jinja +++ b/{{algorithm_name}}/__init__.py.jinja @@ -1,7 +1,15 @@ +{% if import_infra_extraction -%} +from vantage6.algorithm.data_extraction import * +{% endif %} + +{% if import_infra_preprocessing -%} +from vantage6.algorithm.preprocessing import * +{% endif %} + {% if has_central_function -%} from .central import * {% endif %} -{%- if has_partial_function %} -from .partial import * +{%- if has_federated_function %} +from .federated import * {% endif %} diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index 9577a0f..108b288 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -6,34 +6,22 @@ The results in a return statement are sent to the vantage6 server (after encryption if that is enabled). """ {% if has_central_function -%} - {% if central_function_data -%} - import pandas as pd - {% endif %} from typing import Any from vantage6.algorithm.tools.util import info, warn, error - {% if central_function_client %} from vantage6.algorithm.tools.decorators import algorithm_client - {% endif %} - {% if central_function_data %} -from vantage6.algorithm.tools.decorators import data - {% endif %} - {% if central_function_client %} from vantage6.algorithm.client import AlgorithmClient - {% endif %} {# Include the function definition from a macro #} {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - central_function_name, central_function_data, central_function_client, - central_function_number_databases, central_args + central_function_name, "central_compute", 0, central_args, ) }} {# Implementation of the algorithm #} """ Central part of the algorithm """ - {% if central_function_client %} # TODO implement this function. Below is an example of a simple but typical # central function. @@ -45,8 +33,8 @@ from vantage6.algorithm.client import AlgorithmClient # Define input parameters for a subtask info("Defining input parameters") input_ = { - {% if has_partial_function %} - "method": "{{partial_function_name}}", + {% if has_federated_function %} + "method": "{{federated_function_name}}", {% else %} # TODO you should define a federated method here (which should also be # implemented in this repository) @@ -54,7 +42,7 @@ from vantage6.algorithm.client import AlgorithmClient {% endif %} "kwargs": { {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(partial_function_name, partial_args, 3) }} + {{ kwargs_define(federated_function_name, federated_args, 3) }} } } @@ -67,26 +55,6 @@ from vantage6.algorithm.client import AlgorithmClient description="This is a very important subtask" ) - {% if use_vpn %} - # To communicate over the VPN network, get VPN addresses of the subtasks - vpn_addresses = client.vpn.get_child_addresses() - # Note that you can also use client.vpn.get_addresses() to not only get the - # subtasks. - # The variable `vpn_addresses` will be something like: - # { - # [ - # 'ip': '1.2.3.4', - # 'port': 5678, - # 'label': 'label_defined_in_dockerfile', - # 'organization_id': 1, - # 'task_id': 1, (task id of the subtask) - # 'parent_id': 1, (task id of this central task) - # ], ... (one for each algorithm container) - # } - # Use the `ip` and `port` to communicate with the subtasks. - # TODO write your own code here to communicate with the subtasks. - {% endif %} - # wait for node to return results of the subtask. info("Waiting for results") results = client.wait_for_results(task_id=task.get("id")) @@ -98,9 +66,6 @@ from vantage6.algorithm.client import AlgorithmClient # return the final results of the algorithm return results - {% else %} - # TODO write your own code here - return {'my_results': 'some_value'} {% endif %} {%- endif %} diff --git a/{{algorithm_name}}/extract.py.jinja b/{{algorithm_name}}/extract.py.jinja new file mode 100644 index 0000000..e69de29 diff --git a/{{algorithm_name}}/partial.py.jinja b/{{algorithm_name}}/federated.py.jinja similarity index 92% rename from {{algorithm_name}}/partial.py.jinja rename to {{algorithm_name}}/federated.py.jinja index 981593d..a23c952 100644 --- a/{{algorithm_name}}/partial.py.jinja +++ b/{{algorithm_name}}/federated.py.jinja @@ -28,8 +28,8 @@ from vantage6.algorithm.client import AlgorithmClient {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - partial_function_name, partial_function_data, partial_function_client, - partial_function_number_databases, partial_args + partial_function_name, "federated_compute", partial_function_number_databases, + partial_args, ) }} {# Implementation of partial algorithm #} diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja new file mode 100644 index 0000000..e69de29 From bfb2d7964e34f0c1d93c821e123e859199619f0f Mon Sep 17 00:00:00 2001 From: Bart van Beusekom Date: Mon, 22 Sep 2025 16:55:39 +0200 Subject: [PATCH 02/15] Implement data extraction functions, fix imports, start of preprocessing code --- copier.yml | 50 +++++++++++++++++++++++++ macros/function_definition.jinja | 8 ++-- {{algorithm_name}}/__init__.py.jinja | 8 ++++ {{algorithm_name}}/central.py.jinja | 2 +- {{algorithm_name}}/extract.py.jinja | 54 +++++++++++++++++++++++++++ {{algorithm_name}}/federated.py.jinja | 30 ++++++--------- 6 files changed, 130 insertions(+), 22 deletions(-) diff --git a/copier.yml b/copier.yml index 71f2321..116ca35 100644 --- a/copier.yml +++ b/copier.yml @@ -80,6 +80,56 @@ federated_args: default: ['arg1', ] multiline: true +# ----------------------- Data extraction ------------------- # + +import_infra_extraction: + type: bool + help: "Do you want to make the default vantage6 data extraction functions available in your algorithm?" + default: true + +has_data_extraction: + type: bool + help: "Do you want to define your own data extraction function in your algorithm?" + default: true + +data_extraction_function_name: + type: str + when: "{{ has_data_extraction }}" + help: "What is the name of your data extraction function?" + default: data_extraction + +data_extraction_args: + type: json + when: "{{ has_data_extraction }}" + help: "Add a list of arguments to the data extraction function '{{ data_extraction_function_name }}'" + default: ['arg1', ] + multiline: true + +# ----------------------- Data preprocessing ------------------- # + +import_infra_preprocessing: + type: bool + help: "Do you want to make the default vantage6 data preprocessing functions available in your algorithm?" + default: true + +has_data_preprocessing: + type: bool + help: "Do you want to define your own data preprocessing function in your algorithm?" + default: true + +data_preprocessing_function_name: + type: str + when: "{{ has_data_preprocessing }}" + help: "What is the name of your data preprocessing function?" + default: data_preprocessing + +data_preprocessing_args: + type: json + when: "{{ has_data_preprocessing }}" + help: "Add a list of arguments to the data preprocessing function '{{ data_preprocessing_function_name }}'" + default: ['arg1', ] + multiline: true + # ----------------------- Documentation --------------------- # has_docs: diff --git a/macros/function_definition.jinja b/macros/function_definition.jinja index bb4f24b..957d721 100644 --- a/macros/function_definition.jinja +++ b/macros/function_definition.jinja @@ -14,7 +14,7 @@ {# Data decorator #} {%- if num_dbs > 0 -%} -@data({{num_dbs}}) +@dataframe({{num_dbs}}) {% endif -%} {# Function definition #} def {{func_name}}( @@ -22,11 +22,13 @@ def {{func_name}}( {# Add client argument #} {%- if func_type == "central_compute" %} client: AlgorithmClient +{% elif func_type == "data_extraction" %} +connection_details: dict {%- endif -%} {# Add data arguments #} {%- if num_dbs > 0 -%} {# Add comma if previous arguments exist #} - {%- if func_type == "central_compute" -%} + {%- if func_type == "central_compute" or func_type == "data_extraction" -%} ,{{" "}} {%- endif -%} {%- for db_num in range(num_dbs)-%} @@ -39,7 +41,7 @@ client: AlgorithmClient {# Add additional arguments to function #} {%- if func_args %} {# Add comma if previous arguments exist #} - {%- if func_type == "central_compute" or num_dbs > 0 -%} + {%- if func_type == "central_compute" or func_type == "data_extraction" or num_dbs > 0 -%} ,{{" "}} {%- endif -%} {# Add arguments #} diff --git a/{{algorithm_name}}/__init__.py.jinja b/{{algorithm_name}}/__init__.py.jinja index a49344c..2d4fe5c 100644 --- a/{{algorithm_name}}/__init__.py.jinja +++ b/{{algorithm_name}}/__init__.py.jinja @@ -13,3 +13,11 @@ from .central import * {%- if has_federated_function %} from .federated import * {% endif %} + +{% if has_data_extraction -%} +from .extract import * +{% endif %} + +{% if has_data_preprocessing -%} +from .preprocess import * +{% endif %} \ No newline at end of file diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index 108b288..9b5daad 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -9,7 +9,7 @@ encryption if that is enabled). from typing import Any from vantage6.algorithm.tools.util import info, warn, error -from vantage6.algorithm.tools.decorators import algorithm_client +from vantage6.algorithm.decorator.algorithm_client import algorithm_client from vantage6.algorithm.client import AlgorithmClient diff --git a/{{algorithm_name}}/extract.py.jinja b/{{algorithm_name}}/extract.py.jinja index e69de29..25faa4c 100644 --- a/{{algorithm_name}}/extract.py.jinja +++ b/{{algorithm_name}}/extract.py.jinja @@ -0,0 +1,54 @@ +""" +This file contains all data extraction algorithm functions. + +Data extraction functions can be used to extract data from the databases present at a +vantage6 node. The extracted data are stored at the node and can then be used in +subsequent analyses. +""" +{% if has_data_extraction -%} +from typing import Any + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.decorator.action import data_extraction +from vantage6.algorithm.client import AlgorithmClient + +{# Include the function definition from a macro #} +{% from 'macros/function_definition.jinja' import func_def %} +{{ + func_def( + data_extraction_function_name, "data_extraction", 0, data_extraction_args, + ) +}} +{# Implementation of data extraction algorithm #} + """ This function extracts data from ... to the vantage6 node """ + # TODO this is a simple example to show you how to write a data extraction function. + # Replace it by your own code. + database_uri = connection_details["uri"] + + # Example for using environment variables: + # if the node configuration looks like this: + # databases: + # serviceBased: + # - name: my_postgres_db + # uri: postgresql://postgres:password@localhost:5432/my_postgres_db + # type: other + # env: + # USER: postgres + # PASSWORD: password + # then you can retrieve the environment variables: + user = os.getenv("user") + password = os.getenv("password") + + # Example for using the database URI: + df = pd.read_csv(database_uri) + + # or, if you e.g. have a SQL database, maybe something like this: + # df = pd.read_sql_query( + # "SELECT * FROM my_table", database_uri, user=user, password=password + # ) + + # Return results to the vantage6 server. + return df +{%- endif %} + +# TODO Feel free to add more data extraction functions here. \ No newline at end of file diff --git a/{{algorithm_name}}/federated.py.jinja b/{{algorithm_name}}/federated.py.jinja index a23c952..39377e7 100644 --- a/{{algorithm_name}}/federated.py.jinja +++ b/{{algorithm_name}}/federated.py.jinja @@ -1,26 +1,20 @@ """ -This file contains all partial algorithm functions, that are normally executed +This file contains all federated algorithm functions, that are normally executed on all nodes for which the algorithm is executed. The results in a return statement are sent to the vantage6 server (after -encryption if that is enabled). From there, they are sent to the partial task -or directly to the user (if they requested partial results). +encryption if that is enabled). From there, they are sent to the federated task +or directly to the user (if they requested federated results). """ -{% if has_partial_function -%} - {% if partial_function_data -%} +{% if has_federated_function -%} + {% if federated_function_data -%} import pandas as pd {% endif %} from typing import Any from vantage6.algorithm.tools.util import info, warn, error - {% if partial_function_client %} -from vantage6.algorithm.tools.decorators import algorithm_client - {% endif %} - {% if partial_function_data %} -from vantage6.algorithm.tools.decorators import data - {% endif %} - {% if partial_function_client %} -from vantage6.algorithm.client import AlgorithmClient + {% if federated_function_data %} +from vantage6.algorithm.decorator.data import dataframe {% endif %} @@ -28,13 +22,13 @@ from vantage6.algorithm.client import AlgorithmClient {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - partial_function_name, "federated_compute", partial_function_number_databases, - partial_args, + federated_function_name, "federated_compute", federated_function_number_databases, + federated_args, ) }} -{# Implementation of partial algorithm #} +{# Implementation of federated algorithm #} """ Decentral part of the algorithm """ - {% if partial_function_data %} + {% if federated_function_data %} # TODO this is a simple example to show you how to return something simple. # Replace it by your own code info("Computing mean age by gender") @@ -49,4 +43,4 @@ from vantage6.algorithm.client import AlgorithmClient {% endif %} {%- endif %} -# TODO Feel free to add more partial functions here. +# TODO Feel free to add more federated functions here. From 3fd50d9dc4c69cfd033b1e7554dbaf1e90526714 Mon Sep 17 00:00:00 2001 From: Bart van Beusekom Date: Mon, 22 Sep 2025 17:08:51 +0200 Subject: [PATCH 03/15] Add preprocessing function, update docs, and rename kwargs to arguments --- ...ition.jinja => arguments_definition.jinja} | 2 +- test/test.py.jinja | 12 +++---- .../index.rst.jinja | 2 ++ .../implementation.rst.jinja | 19 ++++++++++++ .../{{algorithm_name}}/usage.rst.jinja | 30 +++++++----------- {{algorithm_name}}/central.py.jinja | 6 ++-- {{algorithm_name}}/preprocess.py.jinja | 31 +++++++++++++++++++ 7 files changed, 74 insertions(+), 28 deletions(-) rename macros/{kwargs_definition.jinja => arguments_definition.jinja} (82%) diff --git a/macros/kwargs_definition.jinja b/macros/arguments_definition.jinja similarity index 82% rename from macros/kwargs_definition.jinja rename to macros/arguments_definition.jinja index 04953a4..f1f2c95 100644 --- a/macros/kwargs_definition.jinja +++ b/macros/arguments_definition.jinja @@ -1,4 +1,4 @@ -{%- macro kwargs_define(func_name, func_args, num_indents) -%} +{%- macro arguments_define(func_name, func_args, num_indents) -%} {%- if func_args -%} # TODO add sensible values{{"\n"}} {%- for arg in func_args -%} diff --git a/test/test.py.jinja b/test/test.py.jinja index 13784f1..60da729 100644 --- a/test/test.py.jinja +++ b/test/test.py.jinja @@ -46,9 +46,9 @@ org_ids = [organization["id"] for organization in organizations] central_task = client.task.create( input_={ "method":"{{central_function_name}}", - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(central_function_name, central_args, 3) }} + "arguments": { + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(central_function_name, central_args, 3) }} } }, organizations=[org_ids[0]], @@ -62,9 +62,9 @@ print(results) task = client.task.create( input_={ "method":"{{federated_function_name}}", - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(federated_function_name, federated_args, 3) }} + "arguments": { + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(federated_function_name, federated_args, 3) }} } }, organizations=org_ids diff --git a/{% if has_docs %}docs{% endif %}/index.rst.jinja b/{% if has_docs %}docs{% endif %}/index.rst.jinja index 7dcdbee..29fb4f6 100644 --- a/{% if has_docs %}docs{% endif %}/index.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/index.rst.jinja @@ -11,6 +11,8 @@ Authors .. List authors. +{{author}} + Source code ----------- diff --git a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja index 4b0690f..f743f0c 100644 --- a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja @@ -25,3 +25,22 @@ executed in parallel on each node. .. Describe the function. {% endif %} +{% if has_data_extraction %} +Data extraction (``{{data_extraction_function_name}}``) +-------------------------------- +The data extraction function is responsible for extracting data from the databases and +storing it locally on the node, so that the data can be easily used in subsequent +analyses. + +.. Describe the function. + +{% endif %} +{% if has_data_preprocessing %} +Data preprocessing (``{{data_preprocessing_function_name}}``) +-------------------------------- +The data preprocessing function is responsible for preprocessing the data extracted from +the databases. + +.. Describe the function. + +{% endif %} \ No newline at end of file diff --git a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja index a8e3d74..2870962 100644 --- a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja @@ -23,40 +23,34 @@ first, especially the part about the from vantage6.client import Client - server = 'http://localhost' - port = 7601 - api_path = '/api' - private_key = None - username = 'root' - password = 'password' + server_url = "http://localhost:7601/api" + auth_url = "http://localhost:8080" collaboration_id = 1 organization_ids = [2] # Create connection with the vantage6 server - client = Client(server, port, api_path) - client.setup_encryption(private_key) - client.authenticate(username, password) + client = Client(server_url, auth_url) + client.authenticate() input_ = { - 'method': '{{central_function_name}}', - 'args': [], - 'kwargs': { + "method": "{{central_function_name}}", + "arguments": { {% for arg in central_args %} - '{{arg}}': 'my_value', + "{{arg}}": "my_value", {% endfor %} }, - 'output_format': 'json' + "output_format": "json" } my_task = client.task.create( collaboration=collaboration_id, organizations=organization_ids, - name='{{algorithm_name}}', - description='{{algorithm_description}}', - image='{{docker_image}}', + name="{{algorithm_name}}", + description="{{algorithm_description}}", + image="{{docker_image}}", input_=input_, databases=[{"label": "default"}], ) - task_id = my_task.get('id') + task_id = my_task.get("id") results = client.wait_for_results(task_id) \ No newline at end of file diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index 9b5daad..a5eb7f4 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -40,9 +40,9 @@ from vantage6.algorithm.client import AlgorithmClient # implemented in this repository) "method": "some_example_method", {% endif %} - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(federated_function_name, federated_args, 3) }} + "arguments": { + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(federated_function_name, federated_args, 3) }} } } diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja index e69de29..41ea49e 100644 --- a/{{algorithm_name}}/preprocess.py.jinja +++ b/{{algorithm_name}}/preprocess.py.jinja @@ -0,0 +1,31 @@ +""" +This file contains all data preprocessing algorithm functions. + +Data preprocessing functions can be used to preprocess the data extracted from the +databases. For instance, you can bin data into categories, or remove outliers. +""" +{% if has_data_preprocessing -%} +from typing import Any + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.decorator.action import preprocessing +from vantage6.algorithm.client import AlgorithmClient + +{# Include the function definition from a macro. Note that preprocessing functions + have one database. #} +{% from 'macros/function_definition.jinja' import func_def %} +{{ + func_def( + data_preprocessing_function_name, "preprocessing", 1, data_preprocessing_args, + ) +}} +{# Implementation of data preprocessing algorithm #} + """ This function preprocesses the data by ...""" + # TODO this is a simple example to show you how to write a data preprocessing function. + # Replace it by your own code. Example adds a new BMI column based on height and + # weight. + df["BMI"] = df["Weight"] / (df["Height"] ** 2) + return df +{%- endif %} + +# TODO Feel free to add more data preprocessing functions here. \ No newline at end of file From 234accfe15e4b3ccb742670172cd4b08a0b64ee8 Mon Sep 17 00:00:00 2001 From: Bart van Beusekom Date: Tue, 23 Sep 2025 13:41:47 +0200 Subject: [PATCH 04/15] Replaced setup.py by pyproject.toml --- README.md.jinja | 3 ++- pyproject.toml.jinja | 36 ++++++++++++++++++++++++++++++++++++ requirements.txt | 2 -- setup.py.jinja | 28 ---------------------------- 4 files changed, 38 insertions(+), 31 deletions(-) create mode 100644 pyproject.toml.jinja delete mode 100644 requirements.txt delete mode 100644 setup.py.jinja diff --git a/README.md.jinja b/README.md.jinja index ac52d0f..62c2221 100644 --- a/README.md.jinja +++ b/README.md.jinja @@ -26,7 +26,8 @@ code for TODO instead of following the checklist below. to include values for these arguments in the `client.task.create()` calls that are available there. - [ ] If you are using Python packages that are not in the standard library, add - them to the `requirements.txt` and `setup.py` file. + them to the `pyproject.toml` file. +- [ ] Fill out the fields in the `pyproject.toml` file. {% if has_docs %} - [ ] Fill in the documentation template. This will help others to understand your algorithm, be able to use it safely, and to contribute to it. diff --git a/pyproject.toml.jinja b/pyproject.toml.jinja new file mode 100644 index 0000000..fbb24c6 --- /dev/null +++ b/pyproject.toml.jinja @@ -0,0 +1,36 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{algorithm_name}}" +version = "1.0.0" +description = "{{algorithm_description}}" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "vantage6-algorithm-tools", + "pandas", +] +authors = [ + # TODO add authors +] +{% if open_source_license == 'mit' %} +license = { text = "MIT" } +{% elif open_source_license == 'apache' %} +license = { text = "Apache Software License 2.0" } +{% elif open_source_license == 'gpl' %} +license = { text = "GNU General Public License v3" } +{% endif %} +keywords = ["vantage6", "algorithm", "federated-learning"] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +# TODO add urls + +[tool.hatch.build.targets.wheel] +packages = ["{{algorithm_name}}"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1f33787..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -vantage6-algorithm-tools -pandas diff --git a/setup.py.jinja b/setup.py.jinja deleted file mode 100644 index 8b18bf3..0000000 --- a/setup.py.jinja +++ /dev/null @@ -1,28 +0,0 @@ -from os import path -from codecs import open -from setuptools import setup, find_packages - -# we're using a README.md, if you do not have this in your folder, simply -# replace this with a string. -here = path.abspath(path.dirname(__file__)) -with open(path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -# Here you specify the meta-data of your package. The `name` argument is -# needed in some other steps. -setup( - name='{{algorithm_name}}', - version="1.0.0", - description='{{algorithm_description}}', - long_description=long_description, - long_description_content_type='text/markdown', - # TODO add a url to your github repository here (or remove this line if - # you do not want to make your source code public) - # url='https://github.com/....', - packages=find_packages(), - python_requires='>=3.10', - install_requires=[ - 'vantage6-algorithm-tools', - 'pandas' - ] -) From 8d667399e2dcffcb5fb485923a621c5b5ccfb810 Mon Sep 17 00:00:00 2001 From: bartvanb Date: Tue, 23 Sep 2025 15:58:01 +0200 Subject: [PATCH 05/15] Minor bugfixes and changes, also optimization of spacing --- README.md.jinja | 4 ++- copier.yml | 8 +++--- macros/function_definition.jinja | 8 ++---- test/test.py.jinja | 23 ++++++---------- test/test_data.csv | 38 +++++++++++++------------- {{algorithm_name}}/__init__.py.jinja | 6 ++-- {{algorithm_name}}/central.py.jinja | 8 ++---- {{algorithm_name}}/extract.py.jinja | 5 ++-- {{algorithm_name}}/federated.py.jinja | 1 + {{algorithm_name}}/preprocess.py.jinja | 9 +++--- 10 files changed, 53 insertions(+), 57 deletions(-) diff --git a/README.md.jinja b/README.md.jinja index 62c2221..ecf58df 100644 --- a/README.md.jinja +++ b/README.md.jinja @@ -38,7 +38,9 @@ code for TODO instead of following the checklist below. {% endif %} - [ ] If you want to submit your algorithm to a vantage6 algorithm store, be sure to fill in everything in ``algorithm_store.json`` (and be sure to update - it if you change function names, arguments, etc.). + it if you change function names, arguments, etc.). It is recommended to run + ``v6 algorithm generate-algorithm-json`` to automatically generate the file - this + should work especially well if you have added proper docstrings to your functions. {% if has_gh_pipeline %} - [ ] Create a ``DOCKER_USER`` and ``DOCKER_PASSWORD`` secret in the GitHub repository settings. This will be used to push the Docker image to the registry in the github diff --git a/copier.yml b/copier.yml index 116ca35..7f8e78b 100644 --- a/copier.yml +++ b/copier.yml @@ -30,7 +30,7 @@ central_function_name: type: str when: "{{ has_central_function }}" help: "What is the name of your central function?" - default: central + default: central_function central_args: type: json @@ -50,7 +50,7 @@ federated_function_name: type: str when: "{{ has_federated_function }}" help: "What is the name of your federated function?" - default: federated + default: federated_function federated_function_data: type: bool @@ -96,7 +96,7 @@ data_extraction_function_name: type: str when: "{{ has_data_extraction }}" help: "What is the name of your data extraction function?" - default: data_extraction + default: data_extraction_function data_extraction_args: type: json @@ -121,7 +121,7 @@ data_preprocessing_function_name: type: str when: "{{ has_data_preprocessing }}" help: "What is the name of your data preprocessing function?" - default: data_preprocessing + default: data_preprocessing_function data_preprocessing_args: type: json diff --git a/macros/function_definition.jinja b/macros/function_definition.jinja index 957d721..0a33b7e 100644 --- a/macros/function_definition.jinja +++ b/macros/function_definition.jinja @@ -1,5 +1,4 @@ {% macro func_def(func_name, func_type, num_dbs, func_args) %} - {# Function type decorator #} {% if func_type == "central_compute" -%} @central @@ -11,7 +10,6 @@ {% elif func_type == "data_extraction" -%} @data_extraction {% endif -%} - {# Data decorator #} {%- if num_dbs > 0 -%} @dataframe({{num_dbs}}) @@ -20,9 +18,9 @@ def {{func_name}}( {{" "}}{# <- Add 4 whitespaces before function args start #} {# Add client argument #} -{%- if func_type == "central_compute" %} +{%- if func_type == "central_compute" -%} client: AlgorithmClient -{% elif func_type == "data_extraction" %} +{%- elif func_type == "data_extraction" -%} connection_details: dict {%- endif -%} {# Add data arguments #} @@ -55,4 +53,4 @@ connection_details: dict {# Finalize function definition #} ) -> Any: -{% endmacro %} \ No newline at end of file +{%- endmacro -%} \ No newline at end of file diff --git a/test/test.py.jinja b/test/test.py.jinja index 60da729..980e03a 100644 --- a/test/test.py.jinja +++ b/test/test.py.jinja @@ -11,7 +11,7 @@ installed. This can be done by running: pip install vantage6-algorithm-tools """ -from vantage6.algorithm.tools.mock_client import MockAlgorithmClient +from vantage6.algorithm.client.mock_client import MockAlgorithmClient from pathlib import Path # get path of current directory @@ -44,12 +44,10 @@ org_ids = [organization["id"] for organization in organizations] {% if has_central_function %} # Run the central method on 1 node and get the results central_task = client.task.create( - input_={ - "method":"{{central_function_name}}", - "arguments": { - {% from 'macros/arguments_definition.jinja' import arguments_define %} - {{ arguments_define(central_function_name, central_args, 3) }} - } + method="{{central_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(central_function_name, central_args, 2) }} }, organizations=[org_ids[0]], ) @@ -60,13 +58,10 @@ print(results) {% if has_federated_function %} # Run the federated method for all organizations task = client.task.create( - input_={ - "method":"{{federated_function_name}}", - "arguments": { - {% from 'macros/arguments_definition.jinja' import arguments_define %} - {{ arguments_define(federated_function_name, federated_args, 3) }} - } - }, + method="{{federated_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(federated_function_name, federated_args, 2) }} }, organizations=org_ids ) print(task) diff --git a/test/test_data.csv b/test/test_data.csv index 34cbe41..278058b 100644 --- a/test/test_data.csv +++ b/test/test_data.csv @@ -1,19 +1,19 @@ -"Name","Gender","Age","Height(in)","Weight(lbs)" -"Alex","M",41,74,170 -"Bert","M",42,68,166 -"Carl","M",32,70,155 -"Dave","M",39,72,167 -"Elly","F",30,66,124 -"Fran","F",33,66,115 -"Gwen","F",26,64,121 -"Hank","M",30,71,158 -"Ivan","M",53,72,175 -"Jake","M",32,69,143 -"Kate","F",47,69,139 -"Luke","M",34,72,163 -"Myra","F",23,62,98 -"Neil","M",36,75,160 -"Omar","M",38,70,145 -"Page","F",31,67,135 -"Quin","M",29,71,176 -"Ruth","F",28,65,131 \ No newline at end of file +"Name","Gender","Age","Height(cm)","Weight(kg)" +"Alex","M",41,182,77 +"Bert","M",42,172,75 +"Carl","M",32,177,70 +"Dave","M",39,182,77 +"Elly","F",30,167,65 +"Fran","F",33,167,56 +"Gwen","F",26,162,55 +"Hank","M",30,178,72 +"Ivan","M",53,182,85 +"Jake","M",32,175,65 +"Kate","F",47,175,58 +"Luke","M",34,182,85 +"Myra","F",23,160,54 +"Neil","M",36,187,86 +"Omar","M",38,175,65 +"Page","F",31,175,58 +"Quin","M",29,178,72 +"Ruth","F",28,165,55 \ No newline at end of file diff --git a/{{algorithm_name}}/__init__.py.jinja b/{{algorithm_name}}/__init__.py.jinja index 2d4fe5c..160a14c 100644 --- a/{{algorithm_name}}/__init__.py.jinja +++ b/{{algorithm_name}}/__init__.py.jinja @@ -2,7 +2,7 @@ from vantage6.algorithm.data_extraction import * {% endif %} -{% if import_infra_preprocessing -%} +{%- if import_infra_preprocessing -%} from vantage6.algorithm.preprocessing import * {% endif %} @@ -14,10 +14,10 @@ from .central import * from .federated import * {% endif %} -{% if has_data_extraction -%} +{%- if has_data_extraction -%} from .extract import * {% endif %} -{% if has_data_preprocessing -%} +{%- if has_data_preprocessing -%} from .preprocess import * {% endif %} \ No newline at end of file diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index a5eb7f4..370b939 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -10,6 +10,7 @@ from typing import Any from vantage6.algorithm.tools.util import info, warn, error from vantage6.algorithm.decorator.algorithm_client import algorithm_client +from vantage6.algorithm.decorator.action import central from vantage6.algorithm.client import AlgorithmClient @@ -42,8 +43,7 @@ from vantage6.algorithm.client import AlgorithmClient {% endif %} "arguments": { {% from 'macros/arguments_definition.jinja' import arguments_define %} - {{ arguments_define(federated_function_name, federated_args, 3) }} - } + {{ arguments_define(federated_function_name, federated_args, 3) }} } } # create a subtask for all organizations in the collaboration. @@ -66,8 +66,6 @@ from vantage6.algorithm.client import AlgorithmClient # return the final results of the algorithm return results - {% endif %} - -{%- endif %} +{% endif %} # TODO Feel free to add more central functions here. diff --git a/{{algorithm_name}}/extract.py.jinja b/{{algorithm_name}}/extract.py.jinja index 25faa4c..f8cb8df 100644 --- a/{{algorithm_name}}/extract.py.jinja +++ b/{{algorithm_name}}/extract.py.jinja @@ -6,11 +6,12 @@ vantage6 node. The extracted data are stored at the node and can then be used in subsequent analyses. """ {% if has_data_extraction -%} +import os from typing import Any +import pandas as pd from vantage6.algorithm.tools.util import info, warn, error from vantage6.algorithm.decorator.action import data_extraction -from vantage6.algorithm.client import AlgorithmClient {# Include the function definition from a macro #} {% from 'macros/function_definition.jinja' import func_def %} @@ -49,6 +50,6 @@ from vantage6.algorithm.client import AlgorithmClient # Return results to the vantage6 server. return df -{%- endif %} +{% endif %} # TODO Feel free to add more data extraction functions here. \ No newline at end of file diff --git a/{{algorithm_name}}/federated.py.jinja b/{{algorithm_name}}/federated.py.jinja index 39377e7..a60d6ba 100644 --- a/{{algorithm_name}}/federated.py.jinja +++ b/{{algorithm_name}}/federated.py.jinja @@ -13,6 +13,7 @@ import pandas as pd from typing import Any from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.decorator.action import federated {% if federated_function_data %} from vantage6.algorithm.decorator.data import dataframe {% endif %} diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja index 41ea49e..9df796d 100644 --- a/{{algorithm_name}}/preprocess.py.jinja +++ b/{{algorithm_name}}/preprocess.py.jinja @@ -6,10 +6,11 @@ databases. For instance, you can bin data into categories, or remove outliers. """ {% if has_data_preprocessing -%} from typing import Any +import pandas as pd from vantage6.algorithm.tools.util import info, warn, error from vantage6.algorithm.decorator.action import preprocessing -from vantage6.algorithm.client import AlgorithmClient +from vantage6.algorithm.decorator.data import dataframe {# Include the function definition from a macro. Note that preprocessing functions have one database. #} @@ -24,8 +25,8 @@ from vantage6.algorithm.client import AlgorithmClient # TODO this is a simple example to show you how to write a data preprocessing function. # Replace it by your own code. Example adds a new BMI column based on height and # weight. - df["BMI"] = df["Weight"] / (df["Height"] ** 2) - return df -{%- endif %} + df1["BMI"] = df1["Weight"] / (df1["Height"] ** 2) + return df1 +{% endif %} # TODO Feel free to add more data preprocessing functions here. \ No newline at end of file From 231b84201c6536b6c9be5c13d27fab6ee9a7e0c0 Mon Sep 17 00:00:00 2001 From: bartvanb Date: Tue, 23 Sep 2025 16:26:46 +0200 Subject: [PATCH 06/15] Remove dataframe decorator for preprocessing functions as it is now included in the infra --- {{algorithm_name}}/preprocess.py.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja index 9df796d..355ffb7 100644 --- a/{{algorithm_name}}/preprocess.py.jinja +++ b/{{algorithm_name}}/preprocess.py.jinja @@ -17,7 +17,7 @@ from vantage6.algorithm.decorator.data import dataframe {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - data_preprocessing_function_name, "preprocessing", 1, data_preprocessing_args, + data_preprocessing_function_name, "preprocessing", 0, data_preprocessing_args, ) }} {# Implementation of data preprocessing algorithm #} From 4e8eb6c6f33320a694beb5395c3ace7b36f7a60d Mon Sep 17 00:00:00 2001 From: Frank Date: Mon, 20 Oct 2025 15:29:33 +0200 Subject: [PATCH 07/15] Seperated test files for the different actions --- ...tion %}test_extraction.py{% endif %}.jinja | 62 ++++++++++++++++++ ...g %}test_preprocessing.py{% endif %}.jinja | 65 +++++++++++++++++++ ...nction %}test_compute.py{% endif %}.jinja} | 43 ++++++------ 3 files changed, 150 insertions(+), 20 deletions(-) create mode 100644 test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja create mode 100644 test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja rename test/{test.py.jinja => {% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja} (61%) diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja new file mode 100644 index 0000000..2583002 --- /dev/null +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -0,0 +1,62 @@ +""" +Run this script to test you extraction function locally (without building a Docker +image) using the mock client. + +Run as: + + python test_extraction.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +from vantage6.mock.network import MockNetwork +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +# The MockNetwork expects a list of datasets. In the case of an extraction job, this +# needs to an URI. In this example, we use a CSV file that was included in this +# template. In case you want to connect to a database you need to make sure that the +# database is reachable. +DATABASE_LABEL = "default" +network = MockNetwork( + datasets=[ + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + } + } + ], + module_name="{{algorithm_name}}" +) + +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +# Run the data extraction function +task = client.task.create( + method="{{data_extraction_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(data_extraction_function_name, data_extraction_args, 2) }} + }, + organizations=org_ids, + databases=[{"label": DATABASE_LABEL}], +) + +# Wait for the task to complete +results = client.wait_for_results(task.get("id")) +print(results) + +# TODO implement checking the dataframe that has been created at the node: +for node in network.nodes: + print(node.dataframes) diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja new file mode 100644 index 0000000..023fd03 --- /dev/null +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -0,0 +1,65 @@ +""" +Run this script to test you preprocessing function locally (without building a Docker +image) using the mock client. + +Run as: + + python test_preprocessing.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +import pandas as pd + +from vantage6.mock.network import MockNetwork +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +# The MockNetwork expects a list of datasets. In this instance we are not interested in +# extracting the data from its source. Therefore, we supply the data as a Pandas +# dataframe avoiding the need to extract the data first +data = pd.read_csv(current_path / "test_data.csv") +DATABASE_LABEL = "default" +network = MockNetwork( + datasets=[ + { + DATABASE_LABEL: { + "database": data, + "db_type": "csv", + } + } + ], + module_name="{{algorithm_name}}" +) + +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +# Run the data extraction function +# TODO the MockNetwork has not yet implemented the preprocessing interface +# task = client.task.create( +# method="{{preprocessing_function_name}}", +# arguments={ +# {% from 'macros/arguments_definition.jinja' import arguments_define %} +# {{ arguments_define(preprocessing_function_name, preprocessing_args, 2) }} +# }, +# organizations=org_ids, +# databases=[{"label": DATABASE_LABEL}], +# ) + +# Wait for the task to complete +# results = client.wait_for_results(task.get("id")) +# print(results) + +# TODO implement checking the dataframe that has been created at the node: +# for node in network.nodes: +# print(node.dataframes) diff --git a/test/test.py.jinja b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja similarity index 61% rename from test/test.py.jinja rename to test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja index 980e03a..caaeab2 100644 --- a/test/test.py.jinja +++ b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja @@ -1,42 +1,45 @@ """ -Run this script to test your algorithm locally (without building a Docker -image) using the mock client. +Run this script to test you compute function locally (without building a Docker image) +using the mock client. Run as: - python test.py + python test_compute.py Make sure to do so in an environment where `vantage6-algorithm-tools` is installed. This can be done by running: pip install vantage6-algorithm-tools """ -from vantage6.algorithm.client.mock_client import MockAlgorithmClient +import pandas as pd + +from vantage6.mock.network import MockNetwork from pathlib import Path # get path of current directory current_path = Path(__file__).parent -## Mock client -client = MockAlgorithmClient( +# The MockNetwork expects a list of datasets. In this instance we are not interested in +# extracting the data from its source. Therefore, we supply the data as a Pandas +# dataframe avoiding the need to extract the data first +data = pd.read_csv(current_path / "test_data.csv") +DATABASE_LABEL = "default" +network = MockNetwork( datasets=[ - # Data for first organization - [{ - "database": current_path / "test_data.csv", - "db_type": "csv", - "input_data": {} - }], - # Data for second organization - [{ - "database": current_path / "test_data.csv", - "db_type": "csv", - "input_data": {} - }] + { + DATABASE_LABEL: { + "database": data, + "db_type": "csv", + } + } ], - module="{{algorithm_name}}" + module_name="{{algorithm_name}}" ) -# list mock organizations +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations organizations = client.organization.list() print(organizations) org_ids = [organization["id"] for organization in organizations] From 14e945b1f501c3b27c04fd1b6e741765d8a8e019 Mon Sep 17 00:00:00 2001 From: bartvanb Date: Thu, 13 Nov 2025 14:30:13 +0100 Subject: [PATCH 08/15] Specify v5 in Dockerfile base --- Dockerfile.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.jinja b/Dockerfile.jinja index b506484..9bae565 100644 --- a/Dockerfile.jinja +++ b/Dockerfile.jinja @@ -1,5 +1,5 @@ # basic python3 image as base -FROM harbor2.vantage6.ai/infrastructure/algorithm-base +FROM harbor2.vantage6.ai/infrastructure/algorithm-base:5.0 # This is a placeholder that should be overloaded by invoking # docker build with '--build-arg PKG_NAME=...' From 2b2d9071b80e75ae7fbca02b4e44474bd407da6b Mon Sep 17 00:00:00 2001 From: bartvanb Date: Thu, 13 Nov 2025 14:30:51 +0100 Subject: [PATCH 09/15] Minor updates and bugfixes to work with v5 --- README.md.jinja | 10 ++++++---- algorithm_store.json.jinja | 11 +++-------- macros/function_definition.jinja | 2 +- pyproject.toml.jinja | 7 +++++-- ...a_extraction %}test_extraction.py{% endif %}.jinja | 2 +- ...rocessing %}test_preprocessing.py{% endif %}.jinja | 4 ++-- {{algorithm_name}}/preprocess.py.jinja | 3 +-- 7 files changed, 19 insertions(+), 20 deletions(-) diff --git a/README.md.jinja b/README.md.jinja index ecf58df..c677894 100644 --- a/README.md.jinja +++ b/README.md.jinja @@ -18,7 +18,8 @@ Please ensure to execute the following steps. The steps are also indicated with TODO statements in the generated code - so you can also simply search the code for TODO instead of following the checklist below. -- [ ] Include a URL to your code repository in setup.py. +- [ ] Fill out the fields in the `pyproject.toml` file, such as a URL to your code + repository. Alternatively, remove these fields. - [ ] Implement your algorithm functions. - [ ] You are free to add more arguments to the functions. Be sure to add them *after* the `client` and dataframe arguments. @@ -26,8 +27,7 @@ code for TODO instead of following the checklist below. to include values for these arguments in the `client.task.create()` calls that are available there. - [ ] If you are using Python packages that are not in the standard library, add - them to the `pyproject.toml` file. -- [ ] Fill out the fields in the `pyproject.toml` file. + them to the `pyproject.toml` file. Note that `pandas` is already included by default. {% if has_docs %} - [ ] Fill in the documentation template. This will help others to understand your algorithm, be able to use it safely, and to contribute to it. @@ -39,8 +39,10 @@ code for TODO instead of following the checklist below. - [ ] If you want to submit your algorithm to a vantage6 algorithm store, be sure to fill in everything in ``algorithm_store.json`` (and be sure to update it if you change function names, arguments, etc.). It is recommended to run - ``v6 algorithm generate-algorithm-json`` to automatically generate the file - this + ``v6 algorithm generate-store-json`` to automatically generate the file - this should work especially well if you have added proper docstrings to your functions. + Note that you do need the `vantage6` CLI to be able to use this command, which can be + installed by e.g. running `pip install vantage6` (or `uv pip install vantage6`). {% if has_gh_pipeline %} - [ ] Create a ``DOCKER_USER`` and ``DOCKER_PASSWORD`` secret in the GitHub repository settings. This will be used to push the Docker image to the registry in the github diff --git a/algorithm_store.json.jinja b/algorithm_store.json.jinja index 28f91cc..7e1ba30 100644 --- a/algorithm_store.json.jinja +++ b/algorithm_store.json.jinja @@ -1,7 +1,7 @@ { "name": "{{algorithm_name}}", "image": "{{docker_image}}", - "vantage6_version": "4.6", + "vantage6_version": "5.0", "code_url": "https://mygitrepo.org", "documentation_url": "", "partitioning": "horizontal", @@ -10,18 +10,13 @@ { "name": "{{central_function_name}}", "description": "{{algorithm_description}}", - "type": "central", + "step_type": "central_compute", "databases": [ {%- for idx in range(federated_function_number_databases) -%} { "name": "Central database {{idx + 1}}" }{%- if not loop.last or federated_function_number_databases > 0 -%},{%- endif -%} {% endfor %} - {% for idx in range(federated_function_number_databases) %} - { - "name": "Central database {{idx + 1}}" - }{%- if not loop.last -%},{%- endif -%} - {% endfor %} ], "arguments": [ {% for arg in central_args %} @@ -38,7 +33,7 @@ { "name": "{{federated_function_name}}", "description": "", - "type": "federated", + "step_type": "federated_compute", "databases": [ {%- for idx in range(federated_function_number_databases) -%} { diff --git a/macros/function_definition.jinja b/macros/function_definition.jinja index 0a33b7e..f6b9856 100644 --- a/macros/function_definition.jinja +++ b/macros/function_definition.jinja @@ -11,7 +11,7 @@ @data_extraction {% endif -%} {# Data decorator #} -{%- if num_dbs > 0 -%} +{%- if num_dbs > 0 and func_type != "preprocessing" -%} @dataframe({{num_dbs}}) {% endif -%} {# Function definition #} diff --git a/pyproject.toml.jinja b/pyproject.toml.jinja index fbb24c6..ad4fb57 100644 --- a/pyproject.toml.jinja +++ b/pyproject.toml.jinja @@ -9,7 +9,7 @@ description = "{{algorithm_description}}" readme = "README.md" requires-python = ">=3.13" dependencies = [ - "vantage6-algorithm-tools", + "vantage6-algorithm-tools>=5.0.0a43", "pandas", ] authors = [ @@ -33,4 +33,7 @@ classifiers = [ # TODO add urls [tool.hatch.build.targets.wheel] -packages = ["{{algorithm_name}}"] \ No newline at end of file +packages = ["{{algorithm_name}}"] + +[tool.uv] +prereleases = "allow" \ No newline at end of file diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja index 2583002..41b8108 100644 --- a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -57,6 +57,6 @@ task = client.task.create( results = client.wait_for_results(task.get("id")) print(results) -# TODO implement checking the dataframe that has been created at the node: +{# TODO implement checking the dataframe that has been created at the node: #} for node in network.nodes: print(node.dataframes) diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja index 023fd03..26200c1 100644 --- a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -43,7 +43,7 @@ client = network.user_client organizations = client.organization.list() print(organizations) org_ids = [organization["id"] for organization in organizations] - +{# # Run the data extraction function # TODO the MockNetwork has not yet implemented the preprocessing interface # task = client.task.create( @@ -62,4 +62,4 @@ org_ids = [organization["id"] for organization in organizations] # TODO implement checking the dataframe that has been created at the node: # for node in network.nodes: -# print(node.dataframes) +# print(node.dataframes) #} diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja index 355ffb7..c5a7f13 100644 --- a/{{algorithm_name}}/preprocess.py.jinja +++ b/{{algorithm_name}}/preprocess.py.jinja @@ -10,14 +10,13 @@ import pandas as pd from vantage6.algorithm.tools.util import info, warn, error from vantage6.algorithm.decorator.action import preprocessing -from vantage6.algorithm.decorator.data import dataframe {# Include the function definition from a macro. Note that preprocessing functions have one database. #} {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - data_preprocessing_function_name, "preprocessing", 0, data_preprocessing_args, + data_preprocessing_function_name, "preprocessing", 1, data_preprocessing_args, ) }} {# Implementation of data preprocessing algorithm #} From ae186cfb7dabdebf1a8607caf7c6a66241ebe03f Mon Sep 17 00:00:00 2001 From: bartvanb Date: Thu, 13 Nov 2025 15:51:31 +0100 Subject: [PATCH 10/15] Move definition method and arguments out of input dict, as is required in v5 --- {{algorithm_name}}/central.py.jinja | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index 370b939..9a2020b 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -31,25 +31,19 @@ from vantage6.algorithm.client import AlgorithmClient organizations = client.organization.list() org_ids = [organization.get("id") for organization in organizations] - # Define input parameters for a subtask - info("Defining input parameters") - input_ = { + # create a subtask for all organizations in the collaboration. + info("Creating subtask for all organizations in the collaboration") + task = client.task.create( {% if has_federated_function %} - "method": "{{federated_function_name}}", + method="{{federated_function_name}}", {% else %} # TODO you should define a federated method here (which should also be # implemented in this repository) - "method": "some_example_method", + method="some_example_method", {% endif %} - "arguments": { + arguments={ {% from 'macros/arguments_definition.jinja' import arguments_define %} - {{ arguments_define(federated_function_name, federated_args, 3) }} } - } - - # create a subtask for all organizations in the collaboration. - info("Creating subtask for all organizations in the collaboration") - task = client.task.create( - input_=input_, + {{ arguments_define(federated_function_name, federated_args, 3) }} }, organizations=org_ids, name="My subtask", description="This is a very important subtask" From 8c79591e0f623623365e121c5984a8d5de67671d Mon Sep 17 00:00:00 2001 From: bartvanb Date: Fri, 14 Nov 2025 13:25:19 +0100 Subject: [PATCH 11/15] Modify headers test data --- test/test_data.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_data.csv b/test/test_data.csv index 278058b..1606203 100644 --- a/test/test_data.csv +++ b/test/test_data.csv @@ -1,4 +1,4 @@ -"Name","Gender","Age","Height(cm)","Weight(kg)" +"Name","Gender","Age","Height","Weight" "Alex","M",41,182,77 "Bert","M",42,172,75 "Carl","M",32,177,70 From 5f8416195154f1fc28ede5f0872508651fee033e Mon Sep 17 00:00:00 2001 From: bartvanb Date: Fri, 14 Nov 2025 13:25:33 +0100 Subject: [PATCH 12/15] Default test scripts with 3 nodes instead of 1 --- ...a_extraction %}test_extraction.py{% endif %}.jinja | 9 +++------ ...rocessing %}test_preprocessing.py{% endif %}.jinja | 9 +++------ ...erated_function %}test_compute.py{% endif %}.jinja | 11 +++++------ 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja index 41b8108..751f4f4 100644 --- a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -24,12 +24,9 @@ current_path = Path(__file__).parent DATABASE_LABEL = "default" network = MockNetwork( datasets=[ - { - DATABASE_LABEL: { - "database": current_path / "test_data.csv", - "db_type": "csv", - } - } + {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, + {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, + {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, ], module_name="{{algorithm_name}}" ) diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja index 26200c1..bec97e5 100644 --- a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -26,12 +26,9 @@ data = pd.read_csv(current_path / "test_data.csv") DATABASE_LABEL = "default" network = MockNetwork( datasets=[ - { - DATABASE_LABEL: { - "database": data, - "db_type": "csv", - } - } + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, ], module_name="{{algorithm_name}}" ) diff --git a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja index caaeab2..84a95c0 100644 --- a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja +++ b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja @@ -24,14 +24,13 @@ current_path = Path(__file__).parent # dataframe avoiding the need to extract the data first data = pd.read_csv(current_path / "test_data.csv") DATABASE_LABEL = "default" + +# Create a MockNetwork with identical datasets for three nodes network = MockNetwork( datasets=[ - { - DATABASE_LABEL: { - "database": data, - "db_type": "csv", - } - } + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, ], module_name="{{algorithm_name}}" ) From b3af4177b1905958b041046132bb2b09fedf470a Mon Sep 17 00:00:00 2001 From: bartvanb Date: Fri, 14 Nov 2025 13:59:04 +0100 Subject: [PATCH 13/15] Add test script for preprocessing tasks, and bugfixes in data extraction test --- ...tion %}test_extraction.py{% endif %}.jinja | 28 +++++++++++---- ...g %}test_preprocessing.py{% endif %}.jinja | 34 ++++++++++--------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja index 751f4f4..c0bd330 100644 --- a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -24,9 +24,24 @@ current_path = Path(__file__).parent DATABASE_LABEL = "default" network = MockNetwork( datasets=[ - {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, - {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, - {DATABASE_LABEL: {"database": current_path / "test_data.csv"}}, + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, ], module_name="{{algorithm_name}}" ) @@ -40,20 +55,21 @@ print(organizations) org_ids = [organization["id"] for organization in organizations] # Run the data extraction function -task = client.task.create( +task = client.dataframe.create( method="{{data_extraction_function_name}}", arguments={ {% from 'macros/arguments_definition.jinja' import arguments_define %} {{ arguments_define(data_extraction_function_name, data_extraction_args, 2) }} }, organizations=org_ids, - databases=[{"label": DATABASE_LABEL}], + label=DATABASE_LABEL, ) # Wait for the task to complete results = client.wait_for_results(task.get("id")) -print(results) +print("results:", results) {# TODO implement checking the dataframe that has been created at the node: #} +print("dataframes:") for node in network.nodes: print(node.dataframes) diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja index bec97e5..ae24925 100644 --- a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -40,23 +40,25 @@ client = network.user_client organizations = client.organization.list() print(organizations) org_ids = [organization["id"] for organization in organizations] -{# + # Run the data extraction function -# TODO the MockNetwork has not yet implemented the preprocessing interface -# task = client.task.create( -# method="{{preprocessing_function_name}}", -# arguments={ -# {% from 'macros/arguments_definition.jinja' import arguments_define %} -# {{ arguments_define(preprocessing_function_name, preprocessing_args, 2) }} -# }, -# organizations=org_ids, -# databases=[{"label": DATABASE_LABEL}], -# ) +task = client.dataframe.preprocess( + id_=network.server.dataframes[0]["id"], + image="{{docker_image}}", + method="{{preprocessing_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(preprocessing_function_name, preprocessing_args, 2) }} + }, + organizations=org_ids, + databases=[{"label": DATABASE_LABEL}], +) # Wait for the task to complete -# results = client.wait_for_results(task.get("id")) -# print(results) +results = client.wait_for_results(task.get("id")) +print("results:", results) -# TODO implement checking the dataframe that has been created at the node: -# for node in network.nodes: -# print(node.dataframes) #} +{# TODO implement checking the dataframe that has been created at the node: #} +print("dataframes:") +for node in network.nodes: + print(node.dataframes) From cf0d0ae5d338e9b96959db9158e901f75c8c51d5 Mon Sep 17 00:00:00 2001 From: bartvanb Date: Fri, 21 Nov 2025 14:43:03 +0100 Subject: [PATCH 14/15] Use proper databases definition for v5 --- ..._federated_function %}test_compute.py{% endif %}.jinja | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja index 84a95c0..ca6e305 100644 --- a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja +++ b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja @@ -52,6 +52,9 @@ central_task = client.task.create( {{ arguments_define(central_function_name, central_args, 2) }} }, organizations=[org_ids[0]], + databases=[ + {"type": "dataframe", "dataframe_id": network.server.dataframes[0]["id"]} + ], ) results = client.wait_for_results(central_task.get("id")) print(results) @@ -64,7 +67,10 @@ task = client.task.create( arguments={ {% from 'macros/arguments_definition.jinja' import arguments_define %} {{ arguments_define(federated_function_name, federated_args, 2) }} }, - organizations=org_ids + organizations=org_ids, + databases=[ + {"type": "dataframe", "dataframe_id": network.server.dataframes[0]["id"]} + ], ) print(task) From 808085a9d644c0ba6573ed6dc8f8dc855e13b692 Mon Sep 17 00:00:00 2001 From: bartvanb Date: Wed, 26 Nov 2025 16:15:18 +0100 Subject: [PATCH 15/15] Change directory of mock tools --- ...if has_data_extraction %}test_extraction.py{% endif %}.jinja | 2 +- ..._data_preprocessing %}test_preprocessing.py{% endif %}.jinja | 2 +- ...or has_federated_function %}test_compute.py{% endif %}.jinja | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja index c0bd330..a3e0295 100644 --- a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -11,7 +11,7 @@ installed. This can be done by running: pip install vantage6-algorithm-tools """ -from vantage6.mock.network import MockNetwork +from vantage6.algorithm.mock.network import MockNetwork from pathlib import Path # get path of current directory diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja index ae24925..b66d663 100644 --- a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -13,7 +13,7 @@ installed. This can be done by running: """ import pandas as pd -from vantage6.mock.network import MockNetwork +from vantage6.algorithm.mock.network import MockNetwork from pathlib import Path # get path of current directory diff --git a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja index ca6e305..dc43806 100644 --- a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja +++ b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja @@ -13,7 +13,7 @@ installed. This can be done by running: """ import pandas as pd -from vantage6.mock.network import MockNetwork +from vantage6.algorithm.mock.network import MockNetwork from pathlib import Path # get path of current directory