Skip to content

Commit

Permalink
perf: add column insert test
Browse files Browse the repository at this point in the history
The test creates an empty space with 1000 nullable columns storing uint64
values. Then it initializes a datasets that consists of 10 columns and
1 million rows (row count and both column counts are configurable), then
it inserts the dataset into the space.

By default the test uses serial C API but one may switch to the Arrow API
for batch insertion (the feature is exclusive to the Enterprise Edition).

It's also possible to specify the engine and wal_mode to use (default are
memtx, write).

Needed for tarantool/tarantool-ee#712

NO_DOC=perf test
NO_TEST=perf test
NO_CHANGELOG=perf test
  • Loading branch information
Gumix authored and locker committed Jul 16, 2024
1 parent 8cd677d commit e5c4bd6
Show file tree
Hide file tree
Showing 5 changed files with 428 additions and 1 deletion.
3 changes: 2 additions & 1 deletion changelogs/unreleased/expose-luaL_pushnull-and-luaL_isnull
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
## feature/lua

* Exposed `luaL_pushnull()` and `luaL_isnull()` functions via C module API.
* Exposed the `luaL_pushnull()` and `luaL_isnull()` functions via the C
module API.
8 changes: 8 additions & 0 deletions perf/lua/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
enable_tnt_compile_flags()

set(TARANTOOL_BIN $<TARGET_FILE:tarantool>)
set(RUN_PERF_LUA_TESTS_LIST "")

Expand Down Expand Up @@ -61,6 +63,12 @@ create_perf_lua_test(NAME column_scan
DEPENDS column_scan_module
)

build_module(column_insert_module column_insert_module.c)
target_link_libraries(column_insert_module msgpuck)
create_perf_lua_test(NAME column_insert
DEPENDS column_insert_module
)

add_custom_target(test-lua-perf
DEPENDS "${RUN_PERF_LUA_TESTS_LIST}"
COMMENT "Running Lua performance tests"
Expand Down
170 changes: 170 additions & 0 deletions perf/lua/column_insert.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
--
-- The test measures run time of batch insertion into the space columns.
--
-- Output format (console):
-- <test-case> <rows-per-second>
--
-- NOTE: The test requires a C module. Set the BUILDDIR environment variable to
-- the tarantool build directory if using out-of-source build.
--

local clock = require('clock')
local fiber = require('fiber')
local fio = require('fio')
local log = require('log')
local tarantool = require('tarantool')
local benchmark = require('benchmark')

local USAGE = [[
engine <string, 'memtx'> - space engine to use for the test
wal_mode <string, 'write'> - write-ahead log mode to use for the test
column_count_total <number, 1000> - number of columns in the test space
column_count_batch <number, 10> - number of columns in the record batch
row_count_total <number, 1000000> - number of inserted rows
row_count_batch <number, 1000> - number of rows per record batch
use_arrow_api <boolean, false> - use the Arrow API for batch insertion
]]

local params = benchmark.argparse(arg, {
{'engine', 'string'},
{'wal_mode', 'string'},
{'column_count_total', 'number'},
{'column_count_batch', 'number'},
{'row_count_total', 'number'},
{'row_count_batch', 'number'},
{'use_arrow_api', 'boolean'},
}, USAGE)

local DEFAULT_ENGINE = 'memtx'
local DEFAULT_WAL_MODE = 'write'
local DEFAULT_COLUMN_COUNT_TOTAL = 1000
local DEFAULT_COLUMN_COUNT_BATCH = 10
local DEFAULT_ROW_COUNT_TOTAL = 1000 * 1000
local DEFAULT_ROW_COUNT_BATCH = 1000

params.engine = params.engine or DEFAULT_ENGINE
params.wal_mode = params.wal_mode or DEFAULT_WAL_MODE
params.column_count_total = params.column_count_total or
DEFAULT_COLUMN_COUNT_TOTAL
params.column_count_batch = params.column_count_batch or
DEFAULT_COLUMN_COUNT_BATCH
params.row_count_total = params.row_count_total or DEFAULT_ROW_COUNT_TOTAL
params.row_count_batch = params.row_count_batch or DEFAULT_ROW_COUNT_BATCH
params.use_arrow_api = params.use_arrow_api or false

assert(params.column_count_batch <= params.column_count_total)
assert(params.column_count_batch < 1000 * 1000)
assert(params.row_count_batch <= params.row_count_total)
assert(params.row_count_total % params.row_count_batch == 0)

local bench = benchmark.new(params)

local BUILDDIR = fio.abspath(fio.pathjoin(os.getenv('BUILDDIR') or '.'))
local MODULEPATH = fio.pathjoin(BUILDDIR, 'perf', 'lua',
'?.' .. tarantool.build.mod_format)
package.cpath = MODULEPATH .. ';' .. package.cpath

local test_module_name = 'column_insert_module'
local has_test_module, test_module = pcall(require, test_module_name)
if not has_test_module then
local errmsg = ('Lua module "%s" is not found.\n'):format(test_module_name)
io.stderr:write(errmsg)
os.exit(1)
end

local test_funcs = {}
for _, func_name in ipairs({'insert'}) do
local full_func_name
if params.use_arrow_api then
full_func_name = func_name .. '_batch'
else
full_func_name = func_name .. '_serial'
end
local f = test_module[full_func_name]
if f == nil then
error('The specified test mode is not supported by this build')
end
test_funcs[func_name] = f
end

local test_dir = fio.tempdir()

local function rmtree(s)
if (fio.path.is_file(s) or fio.path.is_link(s)) then
fio.unlink(s)
return
end
if fio.path.is_dir(s) then
for _,i in pairs(fio.listdir(s)) do
rmtree(s..'/'..i)
end
fio.rmdir(s)
end
end

box.cfg({
log = 'tarantool.log',
work_dir = test_dir,
wal_mode = params.wal_mode,
memtx_memory = 4 * 1024 * 1024 * 1024,
checkpoint_count = 1,
})

box.once('init', function()
log.info('Creating the test space...')
local format = {}
for i = 1, params.column_count_total do
table.insert(format, {'field_' .. i, 'uint64', is_nullable = true})
end
format[1].is_nullable = false
local s = box.schema.space.create('test', {
engine = params.engine,
field_count = #format,
format = format,
})
s:create_index('pk')
end)

local function check_result(result, expected)
log.info('expected %s, got %s', expected, result)
assert(result == expected)
end

local TESTS = {
{
name = 'insert',
func = function()
test_funcs.insert(box.space.test.id, params)
check_result(box.space.test:count(), params.row_count_total)
end,
},
}

local function run_test(test)
local func = test.func
local real_time_start = clock.time()
local cpu_time_start = clock.proc()
func()
local delta_real = clock.time() - real_time_start
local delta_cpu = clock.proc() - cpu_time_start
bench:add_result(test.name, {
real_time = delta_real,
cpu_time = delta_cpu,
items = params.row_count_total,
})
end

fiber.set_max_slice(9000)
test_module.init(params)

for _, test in ipairs(TESTS) do
log.info('Running test %s...', test.name)
run_test(test)
end

bench:dump_results()

test_module.fini()
rmtree(test_dir)
os.exit(0)
Loading

0 comments on commit e5c4bd6

Please sign in to comment.