Skip to content

Commit

Permalink
Import catalog-crawler from edc-ce
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilczaja committed Oct 24, 2024
2 parents bcd9697 + 73d00c0 commit a3fb124
Show file tree
Hide file tree
Showing 91 changed files with 5,537 additions and 0 deletions.
44 changes: 44 additions & 0 deletions extensions/catalog-crawler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<!-- PROJECT LOGO -->
<br />
<div align="center">
<a href="https://github.com/sovity/edc-ce">
<img src="https://raw.githubusercontent.com/sovity/edc-ui/main/src/assets/images/sovity_logo.svg" alt="Logo" width="300">
</a>

<h3 align="center">EDC-Connector Extension:<br />Catalog Crawler</h3>

<p align="center">
<a href="https://github.com/sovity/edc-ce/issues/new?template=bug_report.md">Report Bug</a>
·
<a href="https://github.com/sovity/edc-ce/issues/new?template=feature_request.md">Request Feature</a>
</p>
</div>

## About this Extension

The catalog crawler is a deployment unit depending on an existing Authority Portal's database:

- It is a modified EDC connector with the task to crawl the other connector's public data offers.
- It periodically checks the Authority Portal's connector list for its environment.
- It crawls the given connectors in regular intervals.
- It writes the data offers and connector statuses back into the Authority Portal DB.
- Each Environment configured in the Authority Portal requires its own Catalog Crawler with credentials for that environment's DAPS.

## Why does this component exist?

The Authority Portal uses a non-EDC stack, and the EDC stack cannot handle multiple sources of authority at once.

With the `DB -> UI` part of the broker having been moved to the Authority Portal, only the `Catalog -> DB` part remains as the Catalog Crawler,
as it requires Connector-to-Connector IAM within the given Dataspace.

## Deployment

Please see the [Catalog Crawler Productive Deployment Guide](../../docs/deployment-guide/goals/catalog-crawler-production/README.md) for more information.

## License

Apache License 2.0 - see [LICENSE](../../LICENSE)

## Contact

sovity GmbH - contact@sovity.de
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
plugins {
`java-library`
}

dependencies {
// A minimal EDC that can request catalogs
api(libs.edc.controlPlaneCore)
api(libs.edc.dataPlaneSelectorCore)
api(libs.edc.configurationFilesystem)
api(libs.edc.controlPlaneAggregateServices)
api(libs.edc.http)
api(libs.edc.dsp)
api(libs.edc.jsonLd)

// Data Catalog Crawler
api(project(":extensions:catalog-crawler:catalog-crawler"))
}

group = libs.versions.sovityEdcGroup.get()
48 changes: 48 additions & 0 deletions extensions/catalog-crawler/catalog-crawler/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
plugins {
`java-library`
}

dependencies {
annotationProcessor(libs.lombok)
compileOnly(libs.lombok)

implementation(libs.edc.controlPlaneSpi)
implementation(libs.edc.managementApiConfiguration)

implementation(libs.quartz.quartz)
implementation(libs.apache.commonsLang)
implementation(project(":utils:versions"))

api(project(":utils:catalog-parser"))
api(project(":utils:json-and-jsonld-utils"))
api(project(":extensions:wrapper:wrapper-common-mappers"))
api(project(":extensions:catalog-crawler:catalog-crawler-db"))
api(project(":extensions:postgres-flyway-core"))

testAnnotationProcessor(libs.lombok)
testCompileOnly(libs.lombok)
testImplementation(project(":utils:test-utils"))
testImplementation(libs.assertj.core)
testImplementation(libs.mockito.core)
testImplementation(libs.restAssured.restAssured)
testImplementation(libs.testcontainers.testcontainers)
testImplementation(libs.flyway.core)
testImplementation(libs.testcontainers.junitJupiter)
testImplementation(libs.testcontainers.postgresql)
testImplementation(libs.junit.api)
testImplementation(libs.jsonAssert)
testRuntimeOnly(libs.junit.engine)
}

tasks.getByName<Test>("test") {
useJUnitPlatform()
maxParallelForks = 1
}

publishing {
publications {
create<MavenPublication>(project.name) {
from(components["java"])
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2023 sovity GmbH
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0
*
* SPDX-License-Identifier: Apache-2.0
*
* Contributors:
* sovity GmbH - initial API and implementation
*
*/

package de.sovity.edc.ext.catalog.crawler;

import de.sovity.edc.ext.wrapper.api.common.mappers.PlaceholderEndpointService;
import org.eclipse.edc.connector.api.management.configuration.transform.ManagementApiTypeTransformerRegistry;
import org.eclipse.edc.connector.spi.catalog.CatalogService;
import org.eclipse.edc.jsonld.spi.JsonLd;
import org.eclipse.edc.runtime.metamodel.annotation.Inject;
import org.eclipse.edc.runtime.metamodel.annotation.Provides;
import org.eclipse.edc.runtime.metamodel.annotation.Setting;
import org.eclipse.edc.spi.system.ServiceExtension;
import org.eclipse.edc.spi.system.ServiceExtensionContext;
import org.eclipse.edc.spi.types.TypeManager;

import static de.sovity.edc.ext.catalog.crawler.orchestration.config.EdcConfigPropertyUtils.toEdcProp;

@Provides({CrawlerExtensionContext.class})
public class CrawlerExtension implements ServiceExtension {

public static final String EXTENSION_NAME = "Authority Portal Data Catalog Crawler";

@Setting(required = true)
public static final String EXTENSION_ENABLED = toEdcProp("CRAWLER_EXTENSION_ENABLED");

@Setting(required = true)
public static final String ENVIRONMENT_ID = toEdcProp("CRAWLER_ENVIRONMENT_ID");

@Setting(required = true)
public static final String JDBC_URL = toEdcProp("CRAWLER_DB_JDBC_URL");

@Setting(required = true)
public static final String JDBC_USER = toEdcProp("CRAWLER_DB_JDBC_USER");

@Setting(required = true)
public static final String JDBC_PASSWORD = toEdcProp("CRAWLER_DB_JDBC_PASSWORD");

@Setting
public static final String DB_CONNECTION_POOL_SIZE = toEdcProp("CRAWLER_DB_CONNECTION_POOL_SIZE");

@Setting
public static final String DB_CONNECTION_TIMEOUT_IN_MS = toEdcProp("CRAWLER_DB_CONNECTION_TIMEOUT_IN_MS");

@Setting
public static final String DB_MIGRATE = toEdcProp("CRAWLER_DB_MIGRATE");

@Setting
public static final String DB_CLEAN = toEdcProp("CRAWLER_DB_CLEAN");

@Setting
public static final String DB_CLEAN_ENABLED = toEdcProp("CRAWLER_DB_CLEAN_ENABLED");

@Setting
public static final String DB_ADDITIONAL_FLYWAY_MIGRATION_LOCATIONS = toEdcProp("CRAWLER_DB_ADDITIONAL_FLYWAY_LOCATIONS");

@Setting
public static final String NUM_THREADS = toEdcProp("CRAWLER_NUM_THREADS");

@Setting
public static final String MAX_DATA_OFFERS_PER_CONNECTOR = toEdcProp("CRAWLER_MAX_DATA_OFFERS_PER_CONNECTOR");

@Setting
public static final String MAX_CONTRACT_OFFERS_PER_DATA_OFFER = toEdcProp("CRAWLER_MAX_CONTRACT_OFFERS_PER_DATA_OFFER");

@Setting
public static final String CRON_ONLINE_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_ONLINE_CONNECTOR_REFRESH");

@Setting
public static final String CRON_OFFLINE_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_OFFLINE_CONNECTOR_REFRESH");

@Setting
public static final String CRON_DEAD_CONNECTOR_REFRESH = toEdcProp("CRAWLER_CRON_DEAD_CONNECTOR_REFRESH");

@Setting
public static final String SCHEDULED_KILL_OFFLINE_CONNECTORS = toEdcProp("CRAWLER_SCHEDULED_KILL_OFFLINE_CONNECTORS");
@Setting
public static final String KILL_OFFLINE_CONNECTORS_AFTER = toEdcProp("CRAWLER_KILL_OFFLINE_CONNECTORS_AFTER");

@Inject
private TypeManager typeManager;

@Inject
private ManagementApiTypeTransformerRegistry typeTransformerRegistry;

@Inject
private JsonLd jsonLd;

@Inject
private CatalogService catalogService;

/**
* Manual Dependency Injection Result
*/
private CrawlerExtensionContext services;

@Override
public String name() {
return EXTENSION_NAME;
}

@Override
public void initialize(ServiceExtensionContext context) {
if (!Boolean.TRUE.equals(context.getConfig().getBoolean(EXTENSION_ENABLED, false))) {
context.getMonitor().info("Crawler extension is disabled.");
return;
}

services = CrawlerExtensionContextBuilder.buildContext(
context.getConfig(),
context.getMonitor(),
typeManager,
typeTransformerRegistry,
jsonLd,
catalogService,
new PlaceholderEndpointService("http://0.0.0.0/")
);

// Provide access for the tests
context.registerService(CrawlerExtensionContext.class, services);
}

@Override
public void start() {
if (services == null) {
return;
}
services.crawlerInitializer().onStartup();
}

@Override
public void shutdown() {
if (services == null) {
return;
}
services.dataSource().close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2023 sovity GmbH
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0
*
* SPDX-License-Identifier: Apache-2.0
*
* Contributors:
* sovity GmbH - initial API and implementation
*
*/

package de.sovity.edc.ext.catalog.crawler;

import com.zaxxer.hikari.HikariDataSource;
import de.sovity.edc.ext.catalog.crawler.crawling.ConnectorCrawler;
import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogBuilder;
import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory;
import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater;
import de.sovity.edc.ext.wrapper.api.common.mappers.PolicyMapper;


/**
* Manual Dependency Injection result
*
* @param crawlerInitializer Startup Logic
*/
public record CrawlerExtensionContext(
CrawlerInitializer crawlerInitializer,
// Required for stopping connections on closing
HikariDataSource dataSource,
DslContextFactory dslContextFactory,

// Required for Integration Tests
ConnectorCrawler connectorCrawler,
PolicyMapper policyMapper,
FetchedCatalogBuilder catalogPatchBuilder,
DataOfferRecordUpdater dataOfferRecordUpdater
) {
}
Loading

0 comments on commit a3fb124

Please sign in to comment.