Skip to content

Commit

Permalink
Make: Formatting and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Oct 9, 2023
1 parent eadad4e commit b0a280d
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 87 deletions.
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@
"strstream": "cpp",
"filesystem": "cpp",
"stringzilla.h": "c",
"__memory": "c"
"__memory": "c",
"charconv": "c"
},
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
"cSpell.words": [
Expand Down
140 changes: 72 additions & 68 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,105 +1,109 @@
# This CMake file is heavily inspired by following `stringzilla` CMake:
# https://github.com/nlohmann/json/blob/develop/CMakeLists.txt
cmake_minimum_required(VERSION 3.1)
project(stringzilla VERSION 0.1.0 LANGUAGES C CXX)
project(
stringzilla
VERSION 0.1.0
LANGUAGES C CXX)

set (CMAKE_C_STANDARD 11)
set (CMAKE_CXX_STANDARD 17)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)

# Determine if USearch is built as a subproject (using `add_subdirectory`) or if it is the main project
# Determine if USearch is built as a subproject (using `add_subdirectory`) or if
# it is the main project
set(STRINGZILLA_IS_MAIN_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(STRINGZILLA_IS_MAIN_PROJECT ON)
if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(STRINGZILLA_IS_MAIN_PROJECT ON)
endif()

# Options
option(STRINGZILLA_INSTALL "Install CMake targets" OFF)
option(STRINGZILLA_BUILD_TEST "Compile a native unit test in C++" ${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_BENCHMARK "Compile a native benchmark in C++" ${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_TEST "Compile a native unit test in C++"
${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_BENCHMARK "Compile a native benchmark in C++"
${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_WOLFRAM "Compile Wolfram Language bindings" OFF)

# Includes
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
include(ExternalProject)

# Allow CMake 3.13+ to override options when using FetchContent / add_subdirectory
if (POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif ()
# Allow CMake 3.13+ to override options when using FetchContent /
# add_subdirectory
if(POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif()

# Configuration
include(GNUInstallDirs)
set(STRINGZILLA_TARGET_NAME ${PROJECT_NAME})
set(STRINGZILLA_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" CACHE INTERNAL "")
set(STRINGZILLA_INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
set(STRINGZILLA_TARGETS_EXPORT_NAME "${PROJECT_NAME}Targets")
set(STRINGZILLA_CMAKE_CONFIG_TEMPLATE "cmake/config.cmake.in")
set(STRINGZILLA_CMAKE_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(STRINGZILLA_CMAKE_VERSION_CONFIG_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}ConfigVersion.cmake")
set(STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Config.cmake")
set(STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Targets.cmake")
set(STRINGZILLA_PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/pkgconfig")

set(STRINGZILLA_TARGET_NAME ${PROJECT_NAME})
set(STRINGZILLA_CONFIG_INSTALL_DIR
"${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}"
CACHE INTERNAL "")
set(STRINGZILLA_INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
set(STRINGZILLA_TARGETS_EXPORT_NAME "${PROJECT_NAME}Targets")
set(STRINGZILLA_CMAKE_CONFIG_TEMPLATE "cmake/config.cmake.in")
set(STRINGZILLA_CMAKE_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(STRINGZILLA_CMAKE_VERSION_CONFIG_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}ConfigVersion.cmake")
set(STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Config.cmake")
set(STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Targets.cmake")
set(STRINGZILLA_PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/pkgconfig")

# Define our header-only library
add_library(${STRINGZILLA_TARGET_NAME} INTERFACE)
add_library(${PROJECT_NAME}::${STRINGZILLA_TARGET_NAME} ALIAS ${STRINGZILLA_TARGET_NAME})
add_library(${PROJECT_NAME}::${STRINGZILLA_TARGET_NAME} ALIAS
${STRINGZILLA_TARGET_NAME})
set(STRINGZILLA_INCLUDE_BUILD_DIR "${PROJECT_SOURCE_DIR}/include/")

target_compile_definitions(
${STRINGZILLA_TARGET_NAME}
INTERFACE
$<$<NOT:$<BOOL:${JSON_GlobalUDLs}>>:STRINGZILLA_USE_OPENMP=0>
)
${STRINGZILLA_TARGET_NAME}
INTERFACE $<$<NOT:$<BOOL:${JSON_GlobalUDLs}>>:STRINGZILLA_USE_OPENMP=0>)
target_include_directories(
${STRINGZILLA_TARGET_NAME}
${STRINGZILLA_SYSTEM_INCLUDE} INTERFACE
$<BUILD_INTERFACE:${STRINGZILLA_INCLUDE_BUILD_DIR}>
$<INSTALL_INTERFACE:include>
)
${STRINGZILLA_TARGET_NAME} ${STRINGZILLA_SYSTEM_INCLUDE}
INTERFACE $<BUILD_INTERFACE:${STRINGZILLA_INCLUDE_BUILD_DIR}>
$<INSTALL_INTERFACE:include>)

if(STRINGZILLA_INSTALL)
install(
DIRECTORY ${STRINGZILLA_INCLUDE_BUILD_DIR}
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR}
)
install(
FILES ${STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE} ${STRINGZILLA_CMAKE_VERSION_CONFIG_FILE}
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR}
)
export(
TARGETS ${STRINGZILLA_TARGET_NAME}
NAMESPACE ${PROJECT_NAME}::
FILE ${STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE}
)
install(
TARGETS ${STRINGZILLA_TARGET_NAME}
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
INCLUDES DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR}
)
install(
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
NAMESPACE ${PROJECT_NAME}::
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR}
)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc"
DESTINATION ${STRINGZILLA_PKGCONFIG_INSTALL_DIR}
)
install(DIRECTORY ${STRINGZILLA_INCLUDE_BUILD_DIR}
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR})
install(FILES ${STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE}
${STRINGZILLA_CMAKE_VERSION_CONFIG_FILE}
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR})
export(
TARGETS ${STRINGZILLA_TARGET_NAME}
NAMESPACE ${PROJECT_NAME}::
FILE ${STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE})
install(
TARGETS ${STRINGZILLA_TARGET_NAME}
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
INCLUDES
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR})
install(
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
NAMESPACE ${PROJECT_NAME}::
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR})
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc"
DESTINATION ${STRINGZILLA_PKGCONFIG_INSTALL_DIR})
endif()

if(${STRINGZILLA_BUILD_TEST} OR ${STRINGZILLA_BUILD_BENCHMARK})
add_executable(stringzilla_test scripts/test.c)
add_executable(stringzilla_test scripts/test.cpp)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -O3 -flto -march=native -finline-functions -funroll-loops"
)

target_include_directories(stringzilla_test PRIVATE stringzilla)
set_target_properties(stringzilla_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set_target_properties(stringzilla_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR})

if(${CMAKE_VERSION} VERSION_EQUAL 3.13 OR ${CMAKE_VERSION} VERSION_GREATER 3.13)
include(CTest)
enable_testing()
add_test(NAME stringzilla_test COMMAND stringzilla_test)
if(${CMAKE_VERSION} VERSION_EQUAL 3.13 OR ${CMAKE_VERSION} VERSION_GREATER
3.13)
include(CTest)
enable_testing()
add_test(NAME stringzilla_test COMMAND stringzilla_test)
endif()
endif()

10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Coming soon.

## Quick Start: Python 🐍

1️. Install via pip: `pip install stringzilla`
1. Import the classes you need: `from stringzilla import Str, Strs, File`
1. Install via pip: `pip install stringzilla`
2. Import the classes you need: `from stringzilla import Str, Strs, File`

### Basic Usage

Expand Down Expand Up @@ -115,13 +115,13 @@ There is an ABI-stable C 99 interface, in case you have a database, an operating
#include "stringzilla.h"

// Initialize your haystack and needle
sz_haystack_t haystack = {your_text, your_text_length};
sz_needle_t needle = {your_subtext, your_subtext_length, your_quadgram_offset};
sz_string_view_t haystack = {your_text, your_text_length};
sz_string_view_t needle = {your_subtext, your_subtext_length};

// Perform string-level operations
size_t character_count = sz_count_char(haystack, 'a');
size_t character_position = sz_find_unigram(haystack, 'a');
size_t substring_position = sz_find_substr(haystack, needle);
size_t substring_position = sz_find_substring(haystack, needle);

// Perform collection level operations
sz_sequence_t array = {your_order, your_count, your_get_start, your_get_length, your_handle};
Expand Down
20 changes: 14 additions & 6 deletions scripts/bench.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"150 ms ± 2.01 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
"152 ms ± 3.24 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
]
}
],
Expand All @@ -106,7 +106,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"37.8 ms ± 286 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
"38.1 ms ± 312 µs per loop (mean ± std. dev. of 100 runs, 1 loop each)\n"
]
}
],
Expand All @@ -124,7 +124,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"173 ns ± 23.7 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
"The slowest run took 7.28 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"186 ns ± 41.1 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
]
}
],
Expand All @@ -142,8 +143,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 82.51 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"94.3 ns ± 108 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
"The slowest run took 120.95 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"99.6 ns ± 155 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)\n"
]
}
],
Expand All @@ -152,6 +153,13 @@
"sz_str.find(pattern)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -176,7 +184,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.11"
},
"orig_nbformat": 4
},
Expand Down
14 changes: 7 additions & 7 deletions scripts/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ void populate_random_string(char *buffer, int length, int variability) {
buffer[length] = '\0';
}

// Test function for sz_find_substr
void test_sz_find_substr() {
// Test function for sz_find_substring
void test_sz_find_substring() {
char buffer[MAX_LENGTH + 1];
char pattern[6]; // Maximum length of 5 + 1 for '\0'

Expand All @@ -39,19 +39,19 @@ void test_sz_find_substr() {
needle.length = pattern_length;

// Comparing the result of your function with the standard library function.
sz_string_ptr_t result_libc = strstr(buffer, pattern);
sz_string_ptr_t result_stringzilla =
sz_find_substr(haystack.start, haystack.length, needle.start, needle.length);
sz_string_start_t result_libc = strstr(buffer, pattern);
sz_string_start_t result_stringzilla =
sz_find_substring(haystack.start, haystack.length, needle.start, needle.length);

assert(((result_libc == NULL) ^ (result_stringzilla == NULL)) && "Test failed for sz_find_substr");
assert(((result_libc == NULL) ^ (result_stringzilla == NULL)) && "Test failed for sz_find_substring");
}
}
}

int main() {
srand((unsigned int)time(NULL));

test_sz_find_substr();
test_sz_find_substring();
// Add calls to other test functions as you implement them

printf("All tests passed!\n");
Expand Down

0 comments on commit b0a280d

Please sign in to comment.