Skip to content

Commit

Permalink
Improve test benchmarks and update validation logic (#107)
Browse files Browse the repository at this point in the history
An overhaul of the test benchmarks was conducted and various csv test
files were generated. Also, updated the ValidationCsv logic in handling
aggregation rules and optimizing validation conditions. Modifying the
logic resulted in time and memory optimization for the CSV Validator.
  • Loading branch information
SmetDenis authored Mar 28, 2024
1 parent f109d29 commit ca96491
Show file tree
Hide file tree
Showing 75 changed files with 534 additions and 485 deletions.
31 changes: 13 additions & 18 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,41 @@
# @see https://github.com/JBZoo/Csv-Blueprint
#

name: Benchmark
name: Stress Test

on:
pull_request:
branches:
- '*'
push:
branches:
- 'master'
workflow_run:
workflows: [ "Publish Docker" ]
types:
- completed

jobs:
benchmark:
stress-test:
name: Benchmark
runs-on: ubuntu-latest
env:
DOCKER_IMAGE: jbzoo/csv-blueprint:master
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
coverage: none
tools: composer
extensions: opcache

- name: Build project
run: make build --no-print-directory

- name: Create random CSV files with 5M rows
- name: Create random huge CSV files
run: make bench-create-csv --no-print-directory

- name: Pull latest Docker image
run: docker pull ${{ env.DOCKER_IMAGE }}
- name: Building Docker Image
uses: docker/build-push-action@v5
with:
context: .
push: false
tags: jbzoo/csv-blueprint:local

- name: 🔥 Check 5M rows with Docker 🔥
- name: 🔥 Benchmark with Docker 🔥
run: make bench-docker --no-print-directory
44 changes: 4 additions & 40 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ jobs:
with:
php-version: 8.3
coverage: xdebug
tools: composer
extensions: ast, opcache
extensions: ast

- name: Build project
run: make build --no-print-directory
Expand Down Expand Up @@ -101,8 +100,7 @@ jobs:
with:
php-version: 8.1
coverage: none
tools: composer
extensions: ast, opcache
extensions: ast

- name: Install project
run: make build --no-print-directory
Expand Down Expand Up @@ -142,10 +140,9 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
php-version: highest
coverage: none
tools: composer
extensions: ast, opcache
extensions: ast

- name: Install project
run: make build --no-print-directory
Expand Down Expand Up @@ -185,7 +182,6 @@ jobs:
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
tools: composer

- name: Build project in production mode
run: make build-prod --no-print-directory
Expand Down Expand Up @@ -218,8 +214,6 @@ jobs:
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-version }}
tools: composer
extensions: opcache

- name: Build project in production mode
run: make build-prod build-phar-file --no-print-directory
Expand Down Expand Up @@ -310,33 +304,3 @@ jobs:
csv: ./tests/fixtures/batch/*.csv
schema: ./tests/schemas/demo_*.yml
continue-on-error: true


benchmark:
name: Benchmark
runs-on: ubuntu-latest
env:
DOCKER_IMAGE: jbzoo/csv-blueprint:master
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
coverage: none
tools: composer
extensions: opcache

- name: Build project
run: make build --no-print-directory

- name: Create random CSV files with 5M rows
run: make bench-create-csv --no-print-directory

- name: 🔥 Check 5M rows with PHP Binary 🔥
run: make bench-php --no-print-directory
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,42 @@
# @see https://github.com/JBZoo/Csv-Blueprint
#

name: Publish Docker
name: Publish

on:
release:
types: [ created ]

jobs:
phar:
name: Publish PHAR
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.ref_name }}

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
tools: composer

- name: Build project in production mode
run: make build-prod build-phar-file --no-print-directory

- name: 🎨 Test PHAR file
run: ./build/csv-blueprint.phar --ansi -vvv

- name: Upload PHAR to the release
uses: softprops/action-gh-release@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}
files: |
./build/csv-blueprint.phar
docker:
name: Publish Docker
runs-on: ubuntu-latest
Expand Down
47 changes: 0 additions & 47 deletions .github/workflows/release-phar.yml

This file was deleted.

67 changes: 24 additions & 43 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ ifneq (, $(wildcard ./vendor/jbzoo/codestyle/src/init.Makefile))
endif

DOCKER_IMAGE ?= jbzoo/csv-blueprint:local
CMD_VALIDATE := validate:csv --ansi -vvv
CMD_VALIDATE := validate:csv --ansi
BLUEPRINT := COLUMNS=300 time $(PHP_BIN) ./csv-blueprint $(CMD_VALIDATE)
BLUEPRINT_DOCKER := time docker run --rm --workdir=/parent-host -v .:/parent-host $(DOCKER_IMAGE) $(CMD_VALIDATE)
BENCH_BIN := time $(PHP_BIN) ./tests/Benchmarks/bench.php

VALID_CSV := --csv='./tests/fixtures/demo.csv'
VALID_SCHEMA := --schema='./tests/schemas/demo_valid.yml'
Expand Down Expand Up @@ -64,11 +63,11 @@ demo: ##@Demo Run demo via PHP binary
$(call title,"Demo - Valid CSV \(PHP binary\)")
@$(BLUEPRINT) $(VALID_CSV) $(VALID_SCHEMA)
$(call title,"Demo - Invalid CSV \(PHP binary\)")
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA)
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA) -vvv

REPORT ?= table
demo-github: ##@Demo Run demo invalid CSV for GitHub Actions
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA) --report=$(REPORT)
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA) -vvv --report=$(REPORT)


# Docker ###############################################################################################################
Expand All @@ -79,58 +78,40 @@ docker-build: ##@Docker (Re-)build Docker image

docker-demo: ##@Docker Run demo via Docker
$(call title,"Demo - Valid CSV \(via Docker\)")
@$(BLUEPRINT_DOCKER) $(VALID_CSV) $(VALID_SCHEMA)
@$(BLUEPRINT_DOCKER) $(VALID_CSV) $(VALID_SCHEMA) -vvv
$(call title,"Demo - Invalid CSV \(via Docker\)")
@$(BLUEPRINT_DOCKER) $(INVALID_CSV) $(INVALID_SCHEMA)
@$(BLUEPRINT_DOCKER) $(INVALID_CSV) $(INVALID_SCHEMA) -vvv

docker-in: ##@Docker Enter into Docker container
@docker run -it --entrypoint /bin/sh $(DOCKER_IMAGE)


# Benchmarks ###########################################################################################################
BENCH_ROWS ?= 5000000
BENCH_CSV := --csv=./build/bench/5_$(BENCH_ROWS)_header.csv
BENCH_SCHEMA_CELL := --schema=./tests/Benchmarks/benchmark-cell.yml
BENCH_SCHEMA_AGG := --schema=./tests/Benchmarks/benchmark-agg.yml
BENCH_COLS ?= 10
BENCH_ROWS_SRC ?= 1000
BENCH_CSV_PATH := ./build/bench/$(BENCH_COLS)_$(BENCH_ROWS_SRC)_000.csv
BENCH_CSV := --csv='$(BENCH_CSV_PATH)'
BENCH_SCHEMAS := --schema='./tests/Benchmarks/benchmark-*.yml'
BENCH_FLAGS := --debug --profile --report=text -vvv


bench-all: ##@Benchmarks Run all benchmarks
@make bench-create-csv
@make docker-build
@make bench-docker

bench-create-csv: ##@Benchmarks Create CSV file
$(call title,"PHP Benchmarks - Create $(BENCH_ROWS) CSV file")
$(call title,"Benchmark - Create CSV file")
@mkdir -pv ./build/bench/
$(BENCH_BIN) --add-header --columns=5 --rows=$(BENCH_ROWS) --ansi
ls -lah ./build/bench/*.csv;
@rm -fv ./build/bench/*.csv
@time bash ./tests/Benchmarks/create-csv.sh


bench-docker: ##@Benchmarks Run CSV file with Docker
$(call title,"PHP Benchmarks - CSV file with Docker")
$(call title,"Only one cell rule")
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMA_CELL) --profile
$(call title,"Only one aggregation rule")
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMA_AGG) --profile
$(call title,"Benchmark - CSV file with Docker")
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS) $(BENCH_FLAGS)


bench-php: ##@Benchmarks Run CSV file with PHP binary
$(call title,"PHP Benchmarks - CSV file with PHP binary")
$(call title,"Only one cell rule")
-$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMA_CELL) --profile
$(call title,"Only one aggregation rule")
-$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMA_AGG) --profile


BENCH_ROWS_LIST := 100000 1000000
bench-prepare: ##@Benchmarks Create CSV files
$(call title,"PHP Benchmarks - Prepare CSV files")
exit 1; # Disabled for now. Enable if you need to generate CSV files.
@echo "Remove old CSV files"
mkdir -pv ./build/bench/
rm -fv ./build/bench/*.csv
@$(foreach rows,$(BENCH_ROWS_LIST), \
echo "Generate CSV: rows=$(rows)"; \
$(BENCH_BIN) -H --columns=1 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=3 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=5 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=10 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=20 --rows=$(rows) -q & \
wait; \
echo "Generate CSV: rows=$(rows) - done"; \
)
ls -lh ./build/bench/*.csv;
$(call title,"Benchmark - CSV file with PHP binary")
-$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMAS) $(BENCH_FLAGS)
Loading

0 comments on commit ca96491

Please sign in to comment.