Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions docs/development/extensions-core/k8s-jobs.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ Other configurations required are:
Druid operators can dynamically tune certain features within this extension. You don't need to restart the Overlord
service for these changes to take effect.

Druid can dynamically tune [pod template selection](#pod-template-selection), which allows you to configure the pod
template based on the task to be run. To enable dynamic pod template selection, first configure the
[custom template pod adapter](#custom-template-pod-adapter).
Druid can dynamically tune [pod template selection](#pod-template-selection) and [capacity](#properties). Where capacity refers to `druid.indexer.runner.capacity`.

Pod template selection allows you to configure the pod template based on the task to be run. To enable dynamic pod template selection, first configure the [custom template pod adapter](#custom-template-pod-adapter).

Use the following APIs to view and update the dynamic configuration for the Kubernetes task runner.

Expand Down Expand Up @@ -126,7 +126,8 @@ Host: http://ROUTER_IP:ROUTER_PORT
"type": ["index_kafka"]
}
]
}
},
"capacity": 12
}
```
</details>
Expand All @@ -135,6 +136,8 @@ Host: http://ROUTER_IP:ROUTER_PORT

Updates the dynamic configuration for the Kubernetes Task Runner

Note: Both `podTemplateSelectStrategy` and `capacity` are optional fields. A POST request may include either, both, or neither.

##### URL

`POST` `/druid/indexer/v1/k8s/taskrunner/executionconfig`
Expand Down Expand Up @@ -193,7 +196,8 @@ curl "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/k8s/taskrunner/executionconf
"type": ["index_kafka"]
}
]
}
},
"capacity": 6
}'
```

Expand Down Expand Up @@ -225,7 +229,8 @@ Content-Type: application/json
"type": ["index_kafka"]
}
]
}
},
"capacity": 6
}
```

Expand Down Expand Up @@ -309,7 +314,7 @@ Host: http://ROUTER_IP:ROUTER_PORT
"comment": "",
"ip": "127.0.0.1"
},
"payload": "{\"type\": \"default\",\"podTemplateSelectStrategy\":{\"type\": \"taskType\"}",
"payload": "{\"type\": \"default\",\"podTemplateSelectStrategy\":{\"type\": \"taskType\"},\"capacity\":6",
"auditTime": "2024-06-13T20:59:51.622Z"
}
]
Expand Down Expand Up @@ -790,7 +795,7 @@ Should you require the needed permissions for interacting across Kubernetes name
| `druid.indexer.runner.annotations` | `JsonObject` | Additional annotations you want to add to peon pod. | `{}` | No |
| `druid.indexer.runner.peonMonitors` | `JsonArray` | Overrides `druid.monitoring.monitors`. Use this property if you don't want to inherit monitors from the Overlord. | `[]` | No |
| `druid.indexer.runner.graceTerminationPeriodSeconds` | `Long` | Number of seconds you want to wait after a sigterm for container lifecycle hooks to complete. Keep at a smaller value if you want tasks to hold locks for shorter periods. | `PT30S` (K8s default) | No |
| `druid.indexer.runner.capacity` | `Integer` | Number of concurrent jobs that can be sent to Kubernetes. | `2147483647` | No |
| `druid.indexer.runner.capacity` | `Integer` | Number of concurrent jobs that can be sent to Kubernetes. Value will be overridden if a dynamic config value has been set. | `2147483647` | No |
| `druid.indexer.runner.cpuCoreInMicro` | `Integer` | Number of CPU micro core for the task. | `1000` | No |
| `druid.indexer.runner.logSaveTimeout` | `Duration` | The peon executing the ingestion task makes a best effort to persist the pod logs from `k8s` to persistent task log storage. The timeout ensures that `k8s` connection issues do not cause the pod to hang indefinitely thereby blocking Overlord operations. If the timeout occurs before the logs are saved, those logs will not be available in Druid. | `PT300S` | NO |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public class KubernetesOverlordModule implements DruidModule
public void configure(Binder binder)
{
// druid.indexer.runner.type=k8s
JsonConfigProvider.bind(binder, IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, KubernetesTaskRunnerConfig.class);
JsonConfigProvider.bind(binder, IndexingServiceModuleHelper.INDEXER_RUNNER_PROPERTY_PREFIX, KubernetesTaskRunnerStaticConfig.class);
JsonConfigProvider.bind(binder, K8SANDWORKER_PROPERTIES_PREFIX, KubernetesAndWorkerTaskRunnerConfig.class);
JsonConfigProvider.bind(binder, "druid.indexer.queue", TaskQueueConfig.class);
JacksonConfigProvider.bind(binder, KubernetesTaskRunnerDynamicConfig.CONFIG_KEY, KubernetesTaskRunnerDynamicConfig.class, null);
Expand Down Expand Up @@ -150,10 +150,20 @@ public void configure(Binder binder)
JsonConfigProvider.bind(binder, JDK_HTTPCLIENT_PROPERITES_PREFIX, DruidKubernetesJdkHttpClientConfig.class);
}

@Provides
@LazySingleton
public KubernetesTaskRunnerEffectiveConfig provideEffectiveConfig(
KubernetesTaskRunnerStaticConfig staticConfig,
Supplier<KubernetesTaskRunnerDynamicConfig> dynamicConfigSupplier
)
{
return new KubernetesTaskRunnerEffectiveConfig(staticConfig, dynamicConfigSupplier);
}

@Provides
@LazySingleton
public DruidKubernetesClient makeKubernetesClient(
KubernetesTaskRunnerConfig kubernetesTaskRunnerConfig,
KubernetesTaskRunnerStaticConfig kubernetesTaskRunnerConfig,
DruidKubernetesHttpClientFactory httpClientFactory,
Lifecycle lifecycle
)
Expand Down Expand Up @@ -217,7 +227,7 @@ TaskRunnerFactory<? extends WorkerTaskRunner> provideWorkerTaskRunner(
TaskAdapter provideTaskAdapter(
DruidKubernetesClient client,
Properties properties,
KubernetesTaskRunnerConfig kubernetesTaskRunnerConfig,
KubernetesTaskRunnerEffectiveConfig kubernetesTaskRunnerConfig,
TaskConfig taskConfig,
StartupLoggingConfig startupLoggingConfig,
@Self DruidNode druidNode,
Expand Down Expand Up @@ -260,7 +270,7 @@ TaskAdapter provideTaskAdapter(
druidNode,
smileMapper,
taskLogs,
new DynamicConfigPodTemplateSelector(properties, dynamicConfigRef)
new DynamicConfigPodTemplateSelector(properties, kubernetesTaskRunnerConfig)
);
} else {
return new SingleContainerTaskAdapter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import io.fabric8.kubernetes.api.model.batch.v1.Job;
import org.apache.druid.common.config.ConfigManager;
import org.apache.druid.common.guava.FutureUtils;
import org.apache.druid.error.DruidException;
import org.apache.druid.indexer.RunnerTaskState;
Expand All @@ -44,6 +45,7 @@
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
Expand All @@ -56,6 +58,7 @@
import org.apache.druid.k8s.overlord.common.K8sTaskId;
import org.apache.druid.k8s.overlord.common.KubernetesPeonClient;
import org.apache.druid.k8s.overlord.common.KubernetesResourceNotFoundException;
import org.apache.druid.k8s.overlord.execution.KubernetesTaskRunnerDynamicConfig;
import org.apache.druid.k8s.overlord.taskadapter.TaskAdapter;
import org.apache.druid.tasklogs.TaskLogStreamer;
import org.jboss.netty.handler.codec.http.HttpMethod;
Expand All @@ -76,8 +79,11 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
Expand All @@ -100,6 +106,7 @@
public class KubernetesTaskRunner implements TaskLogStreamer, TaskRunner
{
private static final EmittingLogger log = new EmittingLogger(KubernetesTaskRunner.class);
private static final String OBSERVER_KEY = "k8s-task-runner-capacity-%s";
private final CopyOnWriteArrayList<Pair<TaskRunnerListener, Executor>> listeners = new CopyOnWriteArrayList<>();

// to cleanup old jobs that might not have been deleted.
Expand All @@ -111,19 +118,23 @@ public class KubernetesTaskRunner implements TaskLogStreamer, TaskRunner
private final KubernetesPeonClient client;
private final KubernetesTaskRunnerConfig config;
private final ListeningExecutorService exec;
private final ThreadPoolExecutor tpe;
private final HttpClient httpClient;
private final PeonLifecycleFactory peonLifecycleFactory;
private final ServiceEmitter emitter;
// currently worker categories aren't supported, so it's hardcoded.
protected static final String WORKER_CATEGORY = "_k8s_worker_category";

private final AtomicInteger currentCapacity;

public KubernetesTaskRunner(
TaskAdapter adapter,
KubernetesTaskRunnerConfig config,
KubernetesPeonClient client,
HttpClient httpClient,
PeonLifecycleFactory peonLifecycleFactory,
ServiceEmitter emitter
ServiceEmitter emitter,
ConfigManager configManager
)
{
this.adapter = adapter;
Expand All @@ -132,10 +143,12 @@ public KubernetesTaskRunner(
this.httpClient = httpClient;
this.peonLifecycleFactory = peonLifecycleFactory;
this.cleanupExecutor = Executors.newScheduledThreadPool(1);
this.exec = MoreExecutors.listeningDecorator(
Execs.multiThreaded(config.getCapacity(), "k8s-task-runner-%d")
);
this.emitter = emitter;

this.currentCapacity = new AtomicInteger(config.getCapacity());
this.tpe = new ThreadPoolExecutor(currentCapacity.get(), currentCapacity.get(), 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), Execs.makeThreadFactory("k8s-task-runner-%d", null));
this.exec = MoreExecutors.listeningDecorator(this.tpe);
configManager.addListener(KubernetesTaskRunnerDynamicConfig.CONFIG_KEY, StringUtils.format(OBSERVER_KEY, Thread.currentThread().getId()), this::syncCapacityWithDynamicConfig);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this run in the jetty thread?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it will run in the same thread as the one handling the API call. Would your concern be that if more listeners were to be added in the future, the updating of dynamic configs would be a long blocking call?

}

@Override
Expand Down Expand Up @@ -179,6 +192,24 @@ protected KubernetesWorkItem joinAsync(Task task)
}
}

private void syncCapacityWithDynamicConfig(KubernetesTaskRunnerDynamicConfig config)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks much cleaner. Thanks @FrankChen021 for the suggestion and @GabrielCWT for the impl.

{
int newCapacity = config.getCapacity();
if (newCapacity == currentCapacity.get()) {
return;
}
log.info("Adjusting k8s task runner capacity from [%d] to [%d]", currentCapacity.get(), newCapacity);
// maximum pool size must always be greater than or equal to the core pool size
if (newCapacity < currentCapacity.get()) {
tpe.setCorePoolSize(newCapacity);
tpe.setMaximumPoolSize(newCapacity);
} else {
tpe.setMaximumPoolSize(newCapacity);
tpe.setCorePoolSize(newCapacity);
}
currentCapacity.set(newCapacity);
}

private TaskStatus runTask(Task task)
{
return doTask(task, true);
Expand Down Expand Up @@ -294,7 +325,7 @@ public void shutdown(String taskid, String reason)
synchronized (tasks) {
tasks.remove(taskid);
}

}

@Override
Expand Down Expand Up @@ -420,7 +451,7 @@ public void stop()
@Override
public Map<String, Long> getTotalTaskSlotCount()
{
return ImmutableMap.of(WORKER_CATEGORY, (long) config.getCapacity());
return ImmutableMap.of(WORKER_CATEGORY, (long) currentCapacity.get());
}

@Override
Expand All @@ -438,13 +469,13 @@ public Optional<ScalingStats> getScalingStats()
@Override
public Map<String, Long> getIdleTaskSlotCount()
{
return ImmutableMap.of(WORKER_CATEGORY, (long) Math.max(0, config.getCapacity() - tasks.size()));
return ImmutableMap.of(WORKER_CATEGORY, (long) Math.max(0, currentCapacity.get() - tasks.size()));
}

@Override
public Map<String, Long> getUsedTaskSlotCount()
{
return ImmutableMap.of(WORKER_CATEGORY, (long) Math.min(config.getCapacity(), tasks.size()));
return ImmutableMap.of(WORKER_CATEGORY, (long) Math.min(currentCapacity.get(), tasks.size()));
}

@Override
Expand Down Expand Up @@ -535,7 +566,7 @@ public RunnerTaskState getRunnerTaskState(String taskId)
@Override
public int getTotalCapacity()
{
return config.getCapacity();
return currentCapacity.get();
}

@Override
Expand Down
Loading