Add initial version of benchmarks (#50)

* benchmarks: initial * benchmarks: add ci rule * benchmarks: fix ci-config * benchmarks: add missing poetry * benchmarks: lower python requirements for ci ubuntu * benchmarks: check poetry install for old old ubuntu * benchmarks: remove poetry lock file * benchmarks: fix path poetry plot * benchmarks: add pip to cache * benchmarks: improve README * benchmarks: update deps * benchmarks: make evaluation benchmark configurable * benchmarks: patch ci steps * benchmarks: add target to cache * benchmarks: cooldown idle only on multiple runs * benchmarks: add benchmark zip to cache * ci: move copy to correct step * benchmarks: cache poetry lock file for ci * benchmarks: use common cache * benchmarks: remove clean in pre-built job * benchmarks: fix poetry path * ci: add partial clean again * benchmarks: close figures after saving * benchmarks: reduce default cases * benchmarks: fix minor issues * benchmarks: fix plot time * benchmarks: plot recorded procfs values * benchmarks: include temperature in process plot * benchmarks: latency plot use more markers * benchmarks: fix missing format * benchmarks: add legend to latency plot * benchmarks: add cache plot * benchmarks: clean up procfs plot * benchmarks: use wong as default colors
parcio · Feb 9, 2024 · 3f8d83c · 3f8d83c
1 parent f559bda
commit 3f8d83c
Show file tree

Hide file tree

Showing 35 changed files with 2,680 additions and 3 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,10 +32,14 @@ jobs:
         run: |
           cd betree/tests
           cargo build --tests
+          cargo build --tests --release
           cargo clean --package betree-tests
+          cargo clean --release --package betree-tests
           cd ..
           cargo build --tests
+          cargo build --tests --release
           cargo clean --package betree_storage_stack
+          cargo clean --release --package betree_storage_stack
   betree-integration:
     name: Integration Tests
     needs: dependencies
@@ -134,7 +138,8 @@ jobs:
         with:
           path: |
             ~/.cargo
-          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-msrv
+            target
+          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-${{ hashFiles('**/Cargo.toml') }}-msrv
       - name: Prepare JULEA
         run: |
           sudo apt update || true
@@ -174,14 +179,15 @@ jobs:
           curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
           . "${HOME}/.cargo/env"
           echo "RUSTC_VERSION=$(rustc --version | grep --only-matching '[0-9]\+\.[0-9]\+\.[0-9]\+' | head --lines=1)" >> $GITHUB_ENV
-      - name: Cache fio
+      - name: Cache
         id: cache
         uses: actions/cache@v3
         # Environment variables do not seem to work, use ~ instead.
         with:
           path: |
             ~/.cargo
-          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-fio-haura
+            target
+          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-${{ hashFiles('**/Cargo.toml') }}
       - name: Build betree
         run: |
           cd betree
@@ -201,3 +207,61 @@ jobs:
           make fio
           export BETREE_CONFIG=$(realpath ./.ci/haura.json)
           ./fio --direct=1 --rw=write --bs=4M --ioengine=external:src/fio-engine-haura.o --numjobs=1 --name=iops-test-job --size=128M
+  haura-benchmarks:
+    name: Benchmark Compatibility Tests
+    runs-on: ubuntu-22.04
+    timeout-minutes: 60
+    needs: dependencies
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+      - name: Install Rust
+        run: |
+          rm --recursive --force "${HOME}/.cargo" "${HOME}/.rustup"
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+          . "${HOME}/.cargo/env"
+          echo "RUSTC_VERSION=$(rustc --version | grep --only-matching '[0-9]\+\.[0-9]\+\.[0-9]\+' | head --lines=1)" >> $GITHUB_ENV
+      - name: Common Cache
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo
+            target
+          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-${{ hashFiles('**/Cargo.toml') }}
+      - name: Poetry and Data Cache
+        id: cache
+        uses: actions/cache@v3
+        # Environment variables do not seem to work, use ~ instead.
+        with:
+          path: |
+            ~/.cache/pip
+            ~/.cache/pypoetry
+            betree/haura-benchmarks/data
+            betree/haura-benchmarks/haura-plots/poetry.lock
+          key: ubuntu-22.04-rustc-${{ env.RUSTC_VERSION }}-${{ hashFiles('**/Cargo.toml') }}-${{ hashFiles('**/pyproject.toml')}}-benchmarks
+      - name: Build haura-benchmarks
+        run: |
+          cd betree/haura-benchmarks
+          cargo build --release
+      - name: Prepare scripts and configuration
+        run: |
+          cd betree/haura-benchmarks
+          cp example_config/.ci-config.json perf-config.json
+          echo "ci" >> run.sh
+      - name: Run benchmark smoke test
+        run: |
+          cd betree/haura-benchmarks
+          ./run.sh smoke-test
+      - name: Prepare poetry for plots
+        run: |
+          sudo apt update || true
+          sudo apt --yes --no-install-recommends install python3-poetry
+          cd betree/haura-benchmarks/haura-plots
+          poetry install
+      - name: Run plots
+        run: |
+          cd betree/haura-benchmarks/haura-plots
+          poetry run plots ../results/*/*
+          rm -rf results
diff --git a/betree/haura-benchmarks/.gitignore b/betree/haura-benchmarks/.gitignore
@@ -0,0 +1,8 @@
+**/target
+**/*.rs.bk
+**/*.swp
+heaptrack*
+Cargo.lock
+
+results
+data
diff --git a/betree/haura-benchmarks/Cargo.toml b/betree/haura-benchmarks/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "betree-perf"
+version = "0.1.0"
+authors = ["tilpner <betree@tilpner.com>"]
+edition = "2018"
+
+[workspace]
+members = ["."]
+
+[dependencies]
+betree_storage_stack = { path = "..", features = ["experimental-api"]}
+
+structopt = "0.3"
+figment = { version = "0.10", features = [ "json" ] }
+serde_json = "1"
+libmedium = "0.7"
+procfs = "0.16"
+rand = "0.8"
+rand_xoshiro = "0.6"
+crossbeam = "0.8"
+jemallocator = { version = "0.5", features = ["background_threads"] }
+log = "0.4"
+
+# Dependent on versions from haura
+parking_lot = "0.11"
+zip = "0.5"
diff --git a/betree/haura-benchmarks/README.md b/betree/haura-benchmarks/README.md
@@ -0,0 +1,44 @@
+# betree-perf
+
+This directory contains some additional tools and benchmarks which can be helpful when assessing the performance 
+
+- `src/bin/{json-flatten,json-merge,sysinfo-log}.rs`: Tooling to aggregate multiple newline-delimited JSON streams into one final file
+- `src/lib.rs`: Shared setup between benchmarks
+- `src/main.rs`: CLI to select and configure a benchmark, also spawns the sysinfo-log binary
+- `src/{ingest, rewrite, switchover, tiered1, zip, scientific_evaluation, filesystem, filesystem_zip, checkpoints}.rs`: Individual benchmarks
+- `run.sh`: Example usage, runs benchmarks with different configurations
+
+## Configuration
+
+All benchmark invocations can be seen in the `run.sh` script, which can be used
+to create a custom benchmark run. Benchmarks are represented by their own
+function you can uncomment at the bottom of the script.
+
+If you have followed the general scripts to setup `bectl` and `haura` in the
+[documentation](https://parcio.github.io/haura/) you are good to go. Otherwise,
+provide a configuration for the benchmarks either by pointing to a valid
+configuration in the `BETREE_CONFIG` environment variable or by creating a
+`perf-config.json` in the `haura-benchmarks` directory. A collection of example
+configurations can be found in the `example_config` directory.
+
+``` sh
+$ # EITHER
+$ export BETREE_CONFIG=<path_to_config>
+$ # OR
+$ cp example_config/example-config.json perf-config.json
+```
+
+Be sure to modify the example config, if chosen, to your desired specification.
+
+
+## Running the benchmark
+
+If you have configured your benchmarks *and* chosen a configuration for Haura,
+you can start the benchmark. If required for identification of multiple runs a
+name can be given with each invocation which will be used in the stored results:
+
+``` sh
+$ ./run.sh my-benchmark-run
+```
+
+After each individual benchmark an idle period of 1 minute is done by default.
diff --git a/betree/haura-benchmarks/example_config/.ci-config.json b/betree/haura-benchmarks/example_config/.ci-config.json
@@ -0,0 +1,38 @@
+{
+  "storage": {
+    "tiers": [
+      {
+        "top_level_vdevs": [
+          {
+            "mem": 2147483648
+          }
+        ],
+        "preferred_access_type": "Unknown"
+      }
+    ],
+    "queue_depth_factor": 20,
+    "thread_pool_size": null,
+    "thread_pool_pinned": false
+  },
+  "alloc_strategy": [
+    [
+      0
+    ],
+    [
+      0
+    ],
+    [
+      0
+    ],
+    [
+      0
+    ]
+  ],
+  "default_storage_class": 0,
+  "compression": "None",
+  "cache_size": 4294967296,
+  "access_mode": "AlwaysCreateNew",
+  "sync_interval_ms": 1000,
+  "migration_policy": null,
+  "metrics": null
+}
diff --git a/betree/haura-benchmarks/example_config/example-config.json b/betree/haura-benchmarks/example_config/example-config.json
@@ -0,0 +1,47 @@
+{
+  "storage": {
+    "tiers": [
+      {
+        "top_level_vdevs": [
+          {
+            "mem": 4294967296
+          }
+        ],
+        "preferred_access_type": "Unknown"
+      },
+      {
+        "top_level_vdevs": [
+	  {
+            "path": "/tmp/example_disk",
+            "direct": true
+	  }
+        ],
+        "preferred_access_type": "Unknown"
+      }
+    ],
+    "queue_depth_factor": 20,
+    "thread_pool_size": null,
+    "thread_pool_pinned": false
+  },
+  "alloc_strategy": [
+    [
+      0
+    ],
+    [
+      0
+    ],
+    [
+      0
+    ],
+    [
+      0
+    ]
+  ],
+  "default_storage_class": 0,
+  "compression": "None",
+  "cache_size": 268435456,
+  "access_mode": "AlwaysCreateNew",
+  "sync_interval_ms": null,
+  "migration_policy": null,
+  "metrics": null
+}
diff --git a/betree/haura-benchmarks/example_config/new-config-w-lfu.json b/betree/haura-benchmarks/example_config/new-config-w-lfu.json
@@ -0,0 +1,63 @@
+{
+  "storage": {
+    "tiers": [
+      {
+        "top_level_vdevs": [
+          {
+            "mem": 16106127360
+          }
+        ],
+        "preferred_access_type": "Unknown"
+      },
+      {
+        "top_level_vdevs": [
+	  {
+            "path": "/tmp/disk_a",
+                "direct": true
+	  }
+        ],
+        "preferred_access_type": "Unknown"
+      }
+    ],
+    "queue_depth_factor": 20,
+    "thread_pool_size": null,
+    "thread_pool_pinned": false
+  },
+  "alloc_strategy": [
+    [
+      0
+    ],
+    [
+      1
+    ],
+    [
+      2
+    ],
+    [
+      3
+    ]
+  ],
+  "default_storage_class": 0,
+  "compression": "None",
+  "cache_size": 4294967296,
+  "access_mode": "AlwaysCreateNew",
+  "sync_interval_ms": 1000,
+  "migration_policy": {
+    "Lfu": {
+      "grace_period": {
+        "secs": 0,
+        "nanos": 0
+      },
+      "migration_threshold": 0.9,
+      "update_period": {
+        "secs": 1,
+        "nanos": 0
+      },
+      "policy_config": {
+        "promote_num": 99999,
+        "promote_size": 128
+      }
+    }
+  },
+  "metrics": null
+}
diff --git a/betree/haura-benchmarks/example_config/new-config-w-rl.json b/betree/haura-benchmarks/example_config/new-config-w-rl.json
@@ -0,0 +1,60 @@
+{
+  "storage": {
+    "tiers": [
+      {
+        "top_level_vdevs": [
+          {
+            "mem": 16106127360
+          }
+        ],
+        "preferred_access_type": "Unknown"
+      },
+      {
+        "top_level_vdevs": [
+	  {
+            "path": "/tmp/disk_a",
+                "direct": true
+	  }
+        ],
+        "preferred_access_type": "Unknown"
+      }
+    ],
+    "queue_depth_factor": 20,
+    "thread_pool_size": null,
+    "thread_pool_pinned": false
+  },
+  "alloc_strategy": [
+    [
+      0
+    ],
+    [
+      1
+    ],
+    [
+      2
+    ],
+    [
+      3
+    ]
+  ],
+  "default_storage_class": 0,
+  "compression": "None",
+  "cache_size": 4294967296,
+  "access_mode": "AlwaysCreateNew",
+  "sync_interval_ms": 1000,
+  "migration_policy": {
+    "ReinforcementLearning": {
+      "grace_period": {
+        "secs": 0,
+        "nanos": 0
+      },
+      "migration_threshold": 0.8,
+      "update_period": {
+        "secs": 1,
+        "nanos": 0
+      },
+      "policy_config": null
+    }
+  },
+  "metrics": null
+}