Skip to content

Commit

Permalink
UDF benchmarks part 1 (#266)
Browse files Browse the repository at this point in the history
  • Loading branch information
stanbrub authored Feb 12, 2024
1 parent d98bc2c commit 7481937
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 2 deletions.
4 changes: 2 additions & 2 deletions .github/resources/adhoc-scale-benchmark.properties
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ timestamp.test.results=
docker.compose.file=/root/deephaven/docker-compose.yml

# The url used for posting messages to slack
slack.token=${slackToken}
slack.token=

# The channel to post notifications to
slack.channel=${slackChannel}
slack.channel=
52 changes: 52 additions & 0 deletions .github/scripts/dh-jetty-start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

set -o errexit
set -o pipefail

# Build and start the python native Deephaven Community Server

MY_ID=DH_JETTY_BY_STAN
ID_PATTERN="-DMY_ID=${MY_ID}"
JAVA=temurin-17-jdk-amd64
ROOT=./tmp/dh-server-jetty
VENV=${ROOT}/py-venv
BIN=${ROOT}/bin
LOGS=${ROOT}/logs

rm -rf server/jetty-app/build/distributions
rm -rf ${ROOT}
mkdir -p ${BIN} ${LOGS}

OLD_JAVA_HOME="${JAVA_HOME}"
export JAVA_HOME=/usr/lib/jvm/${JAVA}
./gradlew server-jetty-app:build
tar xvf server/jetty-app/build/distributions/server-jetty-*.tar -C ${BIN}
export JAVA_HOME="${OLD_JAVA_HOME}"

rm -f $(find py/server/build/wheel -name "*py3-none-any.whl")
./gradlew py-server:assemble
rm -rf ${VENV}
python3 -m venv ${VENV}
source ${VENV}/bin/activate
pip install "$(find py/server/build/wheel -name '*py3-none-any.whl')[autocomplete]"

export DEBUG="${DEBUG:-"-agentlib:jdwp=transport=dt_socket,server=y,suspend=${SUSPEND:-"n"},address=*:5005"}"

export CONSOLE_TYPE="-Ddeephaven.console.type=${CONSOLE:-"python"}"
export STORAGE="${STORAGE:-"-Dstorage.path=./data/"}"

export CYCLE_TM="${CYCLE_TM:-1000}"
export BARRAGE="${BARRAGE:-"-Dbarrage.minUpdateInterval=${CYCLE_TM} -DPeriodicUpdateGraph.targetCycleDurationMillis=${CYCLE_TM}"}"

export GC_APP="-Dio.deephaven.app.GcApplication.enabled=true"
export GRPC_APP="-Dio.deephaven.server.grpc_api_app.GrpcApiApplication.enabled=true"
export APPS="${APPS:-"$GRPC_APP $GC_APP"}"

export AUTH="-DAuthHandlers=io.deephaven.auth.AnonymousAuthenticationHandler"
export EXTRA_OPS="-Xmx24g ${ID_PATTERN}"

cat redpanda-standalone/docker-compose.yml | sed 's/redpanda:29/localhost:29/g' | sed 's/redpanda:80/localhost:80/g' > ${ROOT}/redpanda-docker-compose.yml
docker compose -f ${ROOT}/redpanda-docker-compose.yml up -d

JAVA_OPTS="$DEBUG $STORAGE $APPS $CONSOLE_TYPE $BARRAGE $AUTH $EXTRA_OPS" ${BIN}/server-jetty-*/bin/start &> ${LOGS}/dh.log &

26 changes: 26 additions & 0 deletions .github/scripts/dh-jetty-stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

# set -o errexit
# set -o pipefail
# set -o nounset

# Stop the native python Deephaven Community Server

MY_ID=DH_JETTY_BY_STAN
ID_PATTERN="-DMY_ID=${MY_ID}"
ROOT=./tmp/dh-server-jetty
KILL_FILE=${ROOT}/pkill.stop.count.txt

echo "Shutting Down: ${MY_ID}"
pkill -c -f -SIGTERM "^.*${ID_PATTERN}.*$" > ${KILL_FILE}

STOP_COUNT=$(cat ${KILL_FILE})
if [[ ${STOP_COUNT} -ge 1 ]]; then
echo "Shut Down: ${MY_ID}"
else
echo "Shut Down: Nothing to shut down"
fi

rm -f ${KILL_FILE}

docker compose -f ${ROOT}/redpanda-docker-compose.yml down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* query to generate a result table and verify the result. The goal is to test some benchmark corner cases like obsolete
* benchmarks, new benchmarks, benchmarks a few days into the next release, etc.
*/
@Disabled
public class PublishTest {
final String[] csvFileNames = {"benchmark-metrics.csv", "benchmark-platform.csv", "benchmark-results.csv"};
final String[] stageRunIds = {"run-1bc89703ab", "run-1bd2e385a7", "run-1bd80a0738", "run-1bdd3080da",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ void initialize(Object testInst) {
from deephaven import new_table, empty_table, garbage_collect, merge
from deephaven.column import long_col, double_col
from deephaven.parquet import read
from numpy import typing as npt
import numpy as np
import numba as nb
""";

this.api = Bench.create(testInst);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */
package io.deephaven.benchmark.tests.standard.formula;

import org.junit.jupiter.api.*;
import io.deephaven.benchmark.tests.standard.StandardTestRunner;

/**
* Standard tests for running user-defined functions. These tests are meant to be compared, and so use the same data.
* <p/>
* Note: When scaling row count, vector size should not get bigger. That would cause more than one axis change and
* invalidate any expected comparisons.
* <p/>
* Note: The "NoHints" tests do have return hints to make them equivalent to the hints tests, otherwise the return value
* would always be a PyObject and not really the same test.
*/
public class UserFormulaTest {
final StandardTestRunner runner = new StandardTestRunner(this);

@BeforeEach
public void setup() {
runner.tables("source");
}

@Test
public void udfIntArrayToIntNoHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(arr):
return arr[0]
source = source.update(['int250 = repeat(int250,5)'])
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=(int)f(int250)'])";
runner.test("UDF- Int Array to Int No Hints", q, "int250");
}

@Test
public void udfIntToIntArrayNoHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(num):
return jpy.array('int', [num] * 5)
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=(int[])f(int250)'])";
runner.test("UDF- Int to Int Array No Hints", q, "int250");
}

@Test
public void udf2IntsToIntNoHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(num1, num2):
return num1 + num2
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=(int)f(int250, int640)'])";
runner.test("UDF- 2 Ints to Int No Hints", q, "int250", "int640");
}


@Test
public void udfIntArrayToIntNumpyHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(arr: npt.NDArray[np.int32]) -> np.int32:
return arr[0]
source = source.update(['int250=repeat(int250,5)'])
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=f(int250)'])";
runner.test("UDF- Int Array to Int Numpy Hints", q, "int250");
}

@Test
public void udfIntToIntArrayNumpyHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(num: np.int32) -> npt.NDArray[np.int32]:
return np.repeat(num,5)
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=f(int250)'])";
runner.test("UDF- Int to Int Array Numpy Hints", q, "int250");
}

@Test
public void udf2IntsToIntNumpyHints() {
runner.setScaleFactors(1, 1);
var setup = """
def f(num1: np.int32, num2: np.int32) -> np.int32:
return num1 + num2
""";
runner.addSetupQuery(setup);
var q = "source.select(['int250=f(int250, int640)'])";
runner.test("UDF- 2 Ints to Int Numpy Hints", q, "int250", "int640");
}

}

0 comments on commit 7481937

Please sign in to comment.