Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/DSC-SPIDAL/twister2
Browse files Browse the repository at this point in the history
  • Loading branch information
pulasthi committed Aug 23, 2019
2 parents 3fa7c50 + 081ae35 commit e4b2d09
Show file tree
Hide file tree
Showing 21 changed files with 563 additions and 244 deletions.
61 changes: 34 additions & 27 deletions docker/standalone/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ FROM ubuntu:18.04

# install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends openjdk-8-jdk sudo \
python-dev python-pip g++ git build-essential \
automake cmake libtool-bin zip libunwind-setjmp0-dev \
zlib1g-dev unzip pkg-config python-setuptools \
maven wget ssh && \
rm -rf /var/lib/apt/lists/* && apt autoclean && apt-get clean && apt autoremove
python3-dev python3-pip g++ git build-essential \
automake cmake libtool-bin zip libunwind-setjmp0-dev \
zlib1g-dev unzip pkg-config python3-setuptools \
maven wget ssh \
&& cd /usr/local/bin \
&& ln -s /usr/bin/python3 python \
&& pip3 install --upgrade pip \
&& sudo pip3 install wheel \
&& rm -rf /var/lib/apt/lists/* && apt autoclean && apt-get clean && apt autoremove


# add user
Expand All @@ -20,32 +24,35 @@ WORKDIR /home/twister2

#install openmpi
RUN cd && \
wget https://www.open-mpi.org/software/ompi/v3.1/downloads/openmpi-3.1.2.tar.gz && \
tar -zxvf openmpi-3.1.2.tar.gz && \
OMPI_BUILD=~/openmpi && \
OMPI_312=~/openmpi-3.1.2 && \
PATH=${OMPI_BUILD}/bin:${PATH} && \
LD_LIBRARY_PATH=${OMPI_BUILD}/lib:${LD_LIBRARY_PATH} && \
cd $OMPI_312 && \
./configure --prefix=$OMPI_BUILD --enable-mpi-java --enable-mpirun-prefix-by-default --enable-orterun-prefix-by-default && make;make install && \
echo "export OMPI_BUILD=$OMPI_BUILD" >> ~/.bashrc && \
echo "export PATH=\${OMPI_BUILD}/bin:\${PATH}" >> ~/.bashrc && \
echo "export LD_LIBRARY_PATH=\${OMPI_BUILD}/lib:\${LD_LIBRARY_PATH}" >> ~/.bashrc && \
rm -rf ~/openmpi-3.1.2.tar.gz $OMPI_312
wget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz && \
tar -zxvf openmpi-4.0.1.tar.gz && \
OMPI_BUILD=~/openmpi && \
OMPI_401=~/openmpi-4.0.1 && \
PATH=${OMPI_BUILD}/bin:${PATH} && \
LD_LIBRARY_PATH=${OMPI_BUILD}/lib:${LD_LIBRARY_PATH} && \
cd $OMPI_401 && \
./configure --prefix=$OMPI_BUILD --enable-mpi-java --enable-mpirun-prefix-by-default --enable-orterun-prefix-by-default && make;make install && \
echo "export OMPI_BUILD=$OMPI_BUILD" >> ~/.bashrc && \
echo "export PATH=\${OMPI_BUILD}/bin:\${PATH}" >> ~/.bashrc && \
echo "export LD_LIBRARY_PATH=\${OMPI_BUILD}/lib:\${LD_LIBRARY_PATH}" >> ~/.bashrc && \
rm -rf ~/openmpi-4.0.1.tar.gz $OMPI_401


#install bazel
RUN wget https://github.com/bazelbuild/bazel/releases/download/0.18.1/bazel-0.18.1-installer-linux-x86_64.sh && \
chmod +x bazel-0.18.1-installer-linux-x86_64.sh && \
./bazel-0.18.1-installer-linux-x86_64.sh --user && \
echo "export PATH=~/.bazel/bin:\$PATH" >> ~/.bashrc && \
rm bazel-0.18.1-installer-linux-x86_64.sh
RUN wget https://github.com/bazelbuild/bazel/releases/download/0.28.1/bazel-0.28.1-installer-linux-x86_64.sh && \
chmod +x bazel-0.28.1-installer-linux-x86_64.sh && \
./bazel-0.28.1-installer-linux-x86_64.sh --user && \
echo "export PATH=~/.bazel/bin:\$PATH" >> ~/.bashrc && \
rm bazel-0.28.1-installer-linux-x86_64.sh



# clone & build twister2
RUN git clone https://github.com/DSC-SPIDAL/twister2.git && \
cd twister2 && ~/.bazel/bin/bazel build --config=ubuntu scripts/package:tarpkgs && cd && \
tar -xzvf twister2/bazel-bin/scripts/package/twister2-0.3.0.tar.gz && \
echo "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64" >> ~/.bashrc && \
sed -i '/twister2.resource.scheduler.mpi.mpirun.file/c\twister2.resource.scheduler.mpi.mpirun.file: "mpirun"' /home/twister2/twister2-0.3.0/conf/standalone/resource.yaml && \
rm -rf bin ~/.cache/bazel
cd twister2 && ~/.bazel/bin/bazel build --config=ubuntu scripts/package:tarpkgs && cd && \
tar -xzvf twister2/bazel-bin/scripts/package/twister2-0.3.0.tar.gz && \
echo "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64" >> ~/.bashrc && \
sed -i '/twister2.resource.scheduler.mpi.mpirun.file/c\twister2.resource.scheduler.mpi.mpirun.file: "mpirun"' /home/twister2/twister2-0.3.0/conf/standalone/resource.yaml && \
rm -rf bin ~/.cache/bazel


4 changes: 2 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# We have moved the documentation to [https://dsc-spidal.github.io/twister2](https://dsc-spidal.github.io/twister2)
# We have moved the documentation to [https://twister2.org](https://twister2.org)

This documentation is written using mark down files. These markdown files are converted into HTML and deployed on to github pages.

Expand Down Expand Up @@ -79,4 +79,4 @@ id: img_proc
title: Image Processing
```

Inside website folder there is a file called, sidebars.json. You can modify that file to include the documentation in the sidebar.
Inside website folder there is a file called, sidebars.json. You can modify that file to include the documentation in the sidebar.
8 changes: 7 additions & 1 deletion docs/docs/concepts/operator-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ to a single value at the target. How to reduce multiple values to a single value
a user defined function.

```java
LogicalPlanBuilder logicalPlanBuilder = LogicalPlanBuilder.plan(
3, // 3 sources
3, // 3 targets
workerEnv
).withFairDistribution();

// setup the batch operation
BReduce reduce = new BReduce(communicator, logicalPlan, sources, target,
BReduce reduce = new BReduce(communicator, logicalPlanBuilder,
reduceFunction, reduceReceiver, datatype);
// send the data
reduce.reduce(task, data, flag);
Expand Down
18 changes: 18 additions & 0 deletions docs/docs/concepts/task-system/task-scheduler/batch.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,21 @@ Finally, the algorithm pack the task instance plan and the container plan into t
and return the same.

[Data Locality Batch Task Scheduler Source Code](https://github.com/DSC-SPIDAL/twister2/blob/master/twister2/taskscheduler/src/java/edu/iu/dsc/tws/tsched/batch/datalocalityaware/DataLocalityBatchTaskScheduler.java)


## Batch Task Scheduler

Batch Task Scheduler is capable of scheduling of single task graph as well as multiple graphs which
depends on the input from other task graphs. If the batch task scheduler receives only single task
graph, first it will check whether it has any receptor and collector tasks in the graph. If the receptor
and collector task doesn't match the parallelism it throws the runtime exception to the user to provide
the same parallelism for the dependent tasks in the graph. If the batch task scheduler receives multiple
task graph, first it will store the collectible name set and receivable name set in the appropriate
set values. If the collectible name set (input key) matches with the receivable name set (input key)
it validate the parallelism of the dependent tasks in the task graph, if it doesn't match it will
guide the user to specify the same parallelism. If it matches, it proceeds with the scheduling of the
task graphs to the same workers. For example, if the map task, 0th task has the data in worker 0, it
will schedule the reduce task 0th task to the worker 1. Batch Task Scheduler considers both the locality
of the data and scheduling the tasks in a round robin fashion.

[Batch Task Scheduler Source Code](https://github.com/DSC-SPIDAL/twister2/blob/master/twister2/taskscheduler/src/java/edu/iu/dsc/tws/tsched/batch/batchscheduler/BatchTaskScheduler.java)
157 changes: 105 additions & 52 deletions docs/docs/concepts/task-system/task-scheduler/task-scheduler.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ that consists of two main methods namely initialize and schedule methods to init
configuration and schedule the taskgraph based on the workers information available in the resource plan.

```text
edu.iu.dsc.tws.tsched.spi.taskschedule.ITaskScheduler
edu.iu.dsc.tws.api.compute.schedule.ITaskScheduler
```

has the following methods namely
Expand Down Expand Up @@ -71,9 +71,13 @@ container id, task instance plan, required and scheduled resource of the contain
plan holds the jobid or the taskgraph id and the container plan. The task schedule plan list is mainly
responsible for holding the taskschedule of the batch tasks.

\`\`bash message Resource { double availableCPU = 1; double availableMemory = 2; double availableDisk = 3; }

```text
message Resource {
double availableCPU = 1;
double availableMemory = 2;
double availableDisk = 3;
}
message TaskInstancePlan {
int32 taskid = 1;
string taskname = 2;
Expand All @@ -99,8 +103,6 @@ message TaskSchedulePlanList {
}
```

\`\`

## YAML file

The task scheduler has task.yaml in the config directory. The task scheduler mode represents either
Expand All @@ -109,49 +111,84 @@ memory, disk, and cpu values assigned to the task instances. The default contain
represents the percentage of values to be added to each container. The default container instance
values represents the default size of memory, disk, and cpu of the container. The task parallelism
represents the default parallelism value assigned to each task instance. The task type represents
the streaming or batch task.The task scheduler dynamically loads the respective streaming and batch
the streaming or batch task. The task scheduler dynamically loads the respective streaming and batch
task schedulers based on the configuration values specified in the task.yaml.

\`\`yaml

```text
#Streaming Task Scheduler Mode "roundrobin" or "firstfit" or "datalocalityaware"
twister2.streaming.taskscheduler: "roundrobin"
# Task scheduling mode for the streaming jobs "roundrobin" or "firstfit" or "datalocalityaware" or "userdefined"
twister2.taskscheduler.streaming: "roundrobin"
#Streaming Task Scheduler Class
twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.streaming.roundrobin.RoundRobinTaskScheduler"
# Task Scheduler class for the round robin streaming task scheduler
twister2.taskscheduler.streaming.class: "edu.iu.dsc.tws.tsched.streaming.roundrobin.RoundRobinTaskScheduler"
#Batch Task Scheduler Mode "roundrobin" or "datalocalityaware"
twister2.batch.taskscheduler: "roundrobin"
# Task scheduling mode for the batch jobs "roundrobin" or "datalocalityaware" or "userdefined" or "batchscheduler"
twister2.taskscheduler.batch: "roundrobin"
#Batch Task Scheduler Class
twister2.batch.taskscheduler.class: "edu.iu.dsc.tws.tsched.batch.roundrobin.RoundRobinBatchTaskScheduler"
# Task Scheduler class for the round robin batch task scheduler
twister2.taskscheduler.batch.class: "edu.iu.dsc.tws.tsched.batch.roundrobin.RoundRobinBatchTaskScheduler"
#Default Task Instance Values
twister2.task.instances: 2
twister2.task.instance.ram: 512.0
twister2.task.instance.disk: 500.0
twister2.task.instance.cpu: 2.0
# Number of task instances to be allocated to each worker/container
twister2.taskscheduler.task.instances: 2
#Default Container Padding Values
twister2.ram.padding.container: 2.0
twister2.disk.padding.container: 12.0
twister2.cpu.padding.container: 1.0
twister2.container.padding.percentage: 2
# Ram value to be allocated to each task instance
twister2.taskscheduler.task.instance.ram: 512.0
#Default Container Instance Values
twister2.container.instance.ram: 2048.0
twister2.container.instance.disk: 2000.0
twister2.container.instance.cpu: 4.0
# Disk value to be allocated to each task instance
twister2.taskscheduler.task.instance.disk: 500.0
#Default Task Parallelism Value
twister2.task.parallelism: 2
# CPU value to be allocated to each task instancetwister2.task.parallelism
twister2.taskscheduler.instance.cpu: 2.0
#Default Task Type "streaming" or "batch"
twister2.task.type: "streaming"
```
# Default Container Instance Values
# Ram value to be allocated to each container
twister2.taskscheduler.container.instance.ram: 4096.0
\`\`
# Disk value to be allocated to each container
twister2.taskscheduler.container.instance.disk: 8000.0
twister2.taskscheduler.container.instance.cpu: 16.0
# Default Container Padding Values
# Default padding value of the ram to be allocated to each container
twister2.taskscheduler.ram.padding.container: 2.0
# Default padding value of the disk to be allocated to each container
twister2.taskscheduler.disk.padding.container: 12.0
# CPU padding value to be allocated to each container
twister2.taskscheduler.cpu.padding.container: 1.0
# Percentage value to be allocated to each container
twister2.taskscheduler.container.padding.percentage: 2
# Static Default Network parameters
# Bandwidth value to be allocated to each container instance for datalocality scheduling
twister2.taskscheduler.container.instance.bandwidth: 100 #Mbps
# Latency value to be allocated to each container instance for datalocality scheduling
twister2.taskscheduler.container.instance.latency: 0.002 #Milliseconds
# Bandwidth to be allocated to each datanode instance for datalocality scheduling
twister2.taskscheduler.datanode.instance.bandwidth: 200 #Mbps
# Latency value to be allocated to each datanode instance for datalocality scheduling
twister2.taskscheduler.datanode.instance.latency: 0.01 #Milliseconds
# Prallelism value to each task instance
twister2.taskscheduler.task.parallelism: 2
# Task type to each submitted job by default it is "streaming" job.
twister2.taskscheduler.task.type: "streaming"
# number of threads per worker
twister2.exector.worker.threads: 1
# name of the batch executor
twister2.executor.batch.name: "edu.iu.dsc.tws.executor.threading.BatchSharingExecutor2"
# number of tuples executed at a single pass
twister2.exector.instance.queue.low.watermark: 10000
```

## User-Defined Task Scheduler

Expand All @@ -165,8 +202,31 @@ as "user-defined" with the corresponding "user-defined" task scheduler class nam
#User-defined Streaming Task Scheduler
twister2.streaming.taskscheduler: "user-defined"
#User-defined Streaming Task Scheduler Class
twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.userdefined.UserDefinedTaskScheduler"
# Task Scheduler for the userDefined Streaming Task Scheduler
#twister2.taskscheduler.streaming.class: "edu.iu.dsc.tws.tsched.userdefined.UserDefinedTaskScheduler"
# Task Scheduler for the userDefined Batch Task Scheduler
#twister2.taskscheduler.batch.class: "edu.iu.dsc.tws.tsched.userdefined.UserDefinedTaskScheduler"
```

\`\`

## Batch Task Scheduler

Batch Task Scheduler is able to handle and schedule both single task graph as well as multiple
dependent task graphs. The main constraint considered in the batch task scheduler is specify the same
parallelism value for the dependent tasks in the task graphs. It schedule the tasks in a round
robin fashion but, while scheduling the child or the dependent tasks it considers the data locality
of the input data from the parent tasks and schedule the tasks in a round robin fashion to the workers.

\`\`yaml

```text
#Batch Task Scheduler
twister2.taskscheduler.batch: "batchscheduler"
#Task Scheduler class for the batch task scheduler
twister2.taskscheduler.batch.class: "edu.iu.dsc.tws.tsched.batch.batchscheduler.BatchTaskScheduler"
```

\`\`
Expand All @@ -176,25 +236,18 @@ twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.userdefined.UserD
The other task schedulers and their respective class names are given below. The user have to specify
the respective scheduler mode and their corresponding class names.


\`\`yaml

```text
#Streaming Task Scheduler Mode "roundrobin" or "firstfit" or "datalocalityaware"
twister2.streaming.taskscheduler: "roundrobin"
#Streaming Task Scheduler Class
twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.streaming.roundrobin.RoundRobinTaskScheduler"
#twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.streaming.datalocalityaware.DataLocalityStreamingTaskScheduler"
#twister2.streaming.taskscheduler.class: "edu.iu.dsc.tws.tsched.streaming.firstfit.FirstFitStreamingTaskScheduler"
# Task Scheduler for the Data Locality Aware Streaming Task Scheduler
#twister2.taskscheduler.streaming.class: "edu.iu.dsc.tws.tsched.streaming.datalocalityaware.DataLocalityStreamingTaskScheduler"
#Batch Task Scheduler Mode "roundrobin" or "datalocalityaware"
#twister2.batch.taskscheduler: "roundrobin"
twister2.batch.taskscheduler: "datalocalityaware"
# Task Scheduler for the FirstFit Streaming Task Scheduler
#twister2.taskscheduler.streaming.class: "edu.iu.dsc.tws.tsched.streaming.firstfit.FirstFitStreamingTaskScheduler"
#Batch Task Scheduler Class
twister2.batch.taskscheduler.class: "edu.iu.dsc.tws.tsched.batch.datalocalityaware.DataLocalityBatchTaskScheduler"
#twister2.batch.taskscheduler.class: "edu.iu.dsc.tws.tsched.batch.roundrobin.RoundRobinBatchTaskScheduler"
# Task Scheduler for the Data Locality Aware Batch Task Scheduler
#twister2.taskscheduler.batch.class: "edu.iu.dsc.tws.tsched.batch.datalocalityaware.DataLocalityBatchTaskScheduler"
```

\`\`
Expand Down
Loading

0 comments on commit e4b2d09

Please sign in to comment.