Skip to content

Commit

Permalink
Support setting MariaDB HNSW index parameters
Browse files Browse the repository at this point in the history
Setting the parameters through session variables
  • Loading branch information
HugoWenTD committed Aug 27, 2024
1 parent 5a7f75f commit 461334d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 16 deletions.
4 changes: 3 additions & 1 deletion ann_benchmarks/algorithms/mariadb/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ RUN mk-build-deps -r -i control -t 'apt-get -y -o Debug::pkgProblemResolver=yes
RUN pip3 install mariadb

# Current development branch for vector support:
RUN git clone --branch bb-11.4-vec-vicentiu https://github.com/MariaDB/server.git --depth 1
RUN curl -LO https://archive.mariadb.org//mariadb-11.6.0_vector/source/mariadb-11.6.0_vector.tar.gz
RUN tar -zxvf mariadb-11.6.0_vector.tar.gz
RUN mv mariadb-11.6.0_vector server

# Build flags from https://salsa.debian.org/mariadb-team/mariadb-server/-/blame/debian/latest/.gitlab-ci.yml?ref_type=heads#L43 , also skipped SPIDER as it failed to build in the branch
RUN cd server/ && cmake . -DWITH_SSL=system -DPLUGIN_COLUMNSTORE=NO -DPLUGIN_ROCKSDB=NO -DPLUGIN_S3=NO -DPLUGIN_MROONGA=NO -DPLUGIN_CONNECT=NO -DPLUGIN_MROONGA=NO -DPLUGIN_TOKUDB=NO -DPLUGIN_PERFSCHEMA=NO -DWITH_WSREP=OFF -DPLUGIN_SPIDER=NO
Expand Down
12 changes: 6 additions & 6 deletions ann_benchmarks/algorithms/mariadb/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ float:
module: ann_benchmarks.algorithms.mariadb
name: mariadb
run_groups:
M-16:
arg_groups: [{M: 16, efConstruction: 200}]
myisam:
arg_groups: [{M: [3, 4, 5, 6, 8, 16, 32, 48], engine: 'MyISAM'}]
args: {}
query_args: [[10, 20, 40, 80, 120, 200, 400, 800]]
M-24:
arg_groups: [{M: 24, efConstruction: 200}]
query_args: [[1, 5, 10, 20, 40, 60, 100]]
innodb:
arg_groups: [{M: [3, 4, 5, 6, 8, 16, 32, 48], engine: 'InnoDB'}]
args: {}
query_args: [[10, 20, 40, 80, 120, 200, 400, 800]]
query_args: [[1, 5, 10, 20, 40, 60, 100]]
14 changes: 5 additions & 9 deletions ann_benchmarks/algorithms/mariadb/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, metric, method_param):
self._test_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
self._metric = metric
self._m = method_param['M']
self._ef_construction = method_param['efConstruction']
self._engine = method_param['engine']
self._cur = None
self._perf_proc = None
self._perf_records = []
Expand Down Expand Up @@ -108,7 +108,7 @@ def prepare_options(self):
f"--socket={self._socket_file}",
"--skip_networking",
"--skip_grant_tables",
"--skip_grant_tables"
f"--mhnsw_max_edges_per_node={self._m}"
]
user_option = MariaDB.get_user_option()
if user_option is not None:
Expand Down Expand Up @@ -248,10 +248,7 @@ def fit(self, X):
self._cur.execute("DROP DATABASE IF EXISTS ann")
self._cur.execute("CREATE DATABASE ann")
self._cur.execute("USE ann")
# Innodb create table with index is not supported with the latest commit of the develop branch.
# Once all supported we could use:
#self._cur.execute("CREATE TABLE t1 (id INT PRIMARY KEY, v BLOB NOT NULL, vector INDEX (v)) ENGINE=InnoDB;")
self._cur.execute("CREATE TABLE t1 (id INT PRIMARY KEY, v BLOB NOT NULL, vector INDEX (v)) ENGINE=MyISAM;")
self._cur.execute(f"CREATE TABLE t1 (id INT PRIMARY KEY, v BLOB NOT NULL, vector INDEX (v)) ENGINE={self._engine};")

# Insert data
print("\nInserting data...")
Expand Down Expand Up @@ -284,8 +281,7 @@ def fit(self, X):
def set_query_arguments(self, ef_search):
# Set ef_search
self._ef_search = ef_search
# Not supported by MariaDB at the moment
#self._cur.execute("SET hnsw.ef_search = %d" % ef_search)
self._cur.execute("SET mhnsw_limit_multiplier = %d" % ef_search)

def query(self, v, n):
self._cur.execute("SELECT id FROM t1 ORDER by vec_distance(v, %s) LIMIT %d", (bytes(vector_to_hex(v)), n))
Expand All @@ -299,7 +295,7 @@ def query(self, v, n):
# return self._cur.fetchone()[0] / 1024

def __str__(self):
return f"MariaDB(m={self._m}, ef_construction={self._ef_construction}, ef_search={self._ef_search})"
return f"MariaDB(m={self._m}, ef_search={self._ef_search}, engine={self._engine})"

def done(self):
# Shutdown MariaDB server when benchmarking done
Expand Down

0 comments on commit 461334d

Please sign in to comment.