Skip to content

Removing LibC dependency, improving search algorithms, simplifying API #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@
"strstream": "cpp",
"filesystem": "cpp",
"stringzilla.h": "c",
"__memory": "c"
"__memory": "c",
"charconv": "c"
},
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
"cSpell.words": [
Expand Down Expand Up @@ -151,6 +152,7 @@
"NOMINMAX",
"NOTIMPLEMENTED",
"numpy",
"octogram",
"pytest",
"Pythonic",
"quadgram",
Expand All @@ -166,6 +168,7 @@
"substr",
"SWAR",
"TPFLAGS",
"unigram",
"Vardanian",
"vectorcallfunc",
"XDECREF",
Expand Down
140 changes: 72 additions & 68 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,105 +1,109 @@
# This CMake file is heavily inspired by following `stringzilla` CMake:
# https://github.com/nlohmann/json/blob/develop/CMakeLists.txt
cmake_minimum_required(VERSION 3.1)
project(stringzilla VERSION 0.1.0 LANGUAGES C CXX)
project(
stringzilla
VERSION 0.1.0
LANGUAGES C CXX)

set (CMAKE_C_STANDARD 11)
set (CMAKE_CXX_STANDARD 17)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)

# Determine if USearch is built as a subproject (using `add_subdirectory`) or if it is the main project
# Determine if USearch is built as a subproject (using `add_subdirectory`) or if
# it is the main project
set(STRINGZILLA_IS_MAIN_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(STRINGZILLA_IS_MAIN_PROJECT ON)
if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(STRINGZILLA_IS_MAIN_PROJECT ON)
endif()

# Options
option(STRINGZILLA_INSTALL "Install CMake targets" OFF)
option(STRINGZILLA_BUILD_TEST "Compile a native unit test in C++" ${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_BENCHMARK "Compile a native benchmark in C++" ${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_TEST "Compile a native unit test in C++"
${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_BENCHMARK "Compile a native benchmark in C++"
${STRINGZILLA_IS_MAIN_PROJECT})
option(STRINGZILLA_BUILD_WOLFRAM "Compile Wolfram Language bindings" OFF)

# Includes
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
include(ExternalProject)

# Allow CMake 3.13+ to override options when using FetchContent / add_subdirectory
if (POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif ()
# Allow CMake 3.13+ to override options when using FetchContent /
# add_subdirectory
if(POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif()

# Configuration
include(GNUInstallDirs)
set(STRINGZILLA_TARGET_NAME ${PROJECT_NAME})
set(STRINGZILLA_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" CACHE INTERNAL "")
set(STRINGZILLA_INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
set(STRINGZILLA_TARGETS_EXPORT_NAME "${PROJECT_NAME}Targets")
set(STRINGZILLA_CMAKE_CONFIG_TEMPLATE "cmake/config.cmake.in")
set(STRINGZILLA_CMAKE_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(STRINGZILLA_CMAKE_VERSION_CONFIG_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}ConfigVersion.cmake")
set(STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Config.cmake")
set(STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE "${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Targets.cmake")
set(STRINGZILLA_PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/pkgconfig")

set(STRINGZILLA_TARGET_NAME ${PROJECT_NAME})
set(STRINGZILLA_CONFIG_INSTALL_DIR
"${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}"
CACHE INTERNAL "")
set(STRINGZILLA_INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
set(STRINGZILLA_TARGETS_EXPORT_NAME "${PROJECT_NAME}Targets")
set(STRINGZILLA_CMAKE_CONFIG_TEMPLATE "cmake/config.cmake.in")
set(STRINGZILLA_CMAKE_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(STRINGZILLA_CMAKE_VERSION_CONFIG_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}ConfigVersion.cmake")
set(STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Config.cmake")
set(STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE
"${STRINGZILLA_CMAKE_CONFIG_DIR}/${PROJECT_NAME}Targets.cmake")
set(STRINGZILLA_PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/pkgconfig")

# Define our header-only library
add_library(${STRINGZILLA_TARGET_NAME} INTERFACE)
add_library(${PROJECT_NAME}::${STRINGZILLA_TARGET_NAME} ALIAS ${STRINGZILLA_TARGET_NAME})
add_library(${PROJECT_NAME}::${STRINGZILLA_TARGET_NAME} ALIAS
${STRINGZILLA_TARGET_NAME})
set(STRINGZILLA_INCLUDE_BUILD_DIR "${PROJECT_SOURCE_DIR}/include/")

target_compile_definitions(
${STRINGZILLA_TARGET_NAME}
INTERFACE
$<$<NOT:$<BOOL:${JSON_GlobalUDLs}>>:STRINGZILLA_USE_OPENMP=0>
)
${STRINGZILLA_TARGET_NAME}
INTERFACE $<$<NOT:$<BOOL:${JSON_GlobalUDLs}>>:STRINGZILLA_USE_OPENMP=0>)
target_include_directories(
${STRINGZILLA_TARGET_NAME}
${STRINGZILLA_SYSTEM_INCLUDE} INTERFACE
$<BUILD_INTERFACE:${STRINGZILLA_INCLUDE_BUILD_DIR}>
$<INSTALL_INTERFACE:include>
)
${STRINGZILLA_TARGET_NAME} ${STRINGZILLA_SYSTEM_INCLUDE}
INTERFACE $<BUILD_INTERFACE:${STRINGZILLA_INCLUDE_BUILD_DIR}>
$<INSTALL_INTERFACE:include>)

if(STRINGZILLA_INSTALL)
install(
DIRECTORY ${STRINGZILLA_INCLUDE_BUILD_DIR}
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR}
)
install(
FILES ${STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE} ${STRINGZILLA_CMAKE_VERSION_CONFIG_FILE}
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR}
)
export(
TARGETS ${STRINGZILLA_TARGET_NAME}
NAMESPACE ${PROJECT_NAME}::
FILE ${STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE}
)
install(
TARGETS ${STRINGZILLA_TARGET_NAME}
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
INCLUDES DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR}
)
install(
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
NAMESPACE ${PROJECT_NAME}::
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR}
)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc"
DESTINATION ${STRINGZILLA_PKGCONFIG_INSTALL_DIR}
)
install(DIRECTORY ${STRINGZILLA_INCLUDE_BUILD_DIR}
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR})
install(FILES ${STRINGZILLA_CMAKE_PROJECT_CONFIG_FILE}
${STRINGZILLA_CMAKE_VERSION_CONFIG_FILE}
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR})
export(
TARGETS ${STRINGZILLA_TARGET_NAME}
NAMESPACE ${PROJECT_NAME}::
FILE ${STRINGZILLA_CMAKE_PROJECT_TARGETS_FILE})
install(
TARGETS ${STRINGZILLA_TARGET_NAME}
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
INCLUDES
DESTINATION ${STRINGZILLA_INCLUDE_INSTALL_DIR})
install(
EXPORT ${STRINGZILLA_TARGETS_EXPORT_NAME}
NAMESPACE ${PROJECT_NAME}::
DESTINATION ${STRINGZILLA_CONFIG_INSTALL_DIR})
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc"
DESTINATION ${STRINGZILLA_PKGCONFIG_INSTALL_DIR})
endif()

if(${STRINGZILLA_BUILD_TEST} OR ${STRINGZILLA_BUILD_BENCHMARK})
add_executable(stringzilla_test scripts/test.c)
add_executable(stringzilla_test scripts/test.cpp)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -O3 -flto -march=native -finline-functions -funroll-loops"
)

target_include_directories(stringzilla_test PRIVATE stringzilla)
set_target_properties(stringzilla_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set_target_properties(stringzilla_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR})

if(${CMAKE_VERSION} VERSION_EQUAL 3.13 OR ${CMAKE_VERSION} VERSION_GREATER 3.13)
include(CTest)
enable_testing()
add_test(NAME stringzilla_test COMMAND stringzilla_test)
if(${CMAKE_VERSION} VERSION_EQUAL 3.13 OR ${CMAKE_VERSION} VERSION_GREATER
3.13)
include(CTest)
enable_testing()
add_test(NAME stringzilla_test COMMAND stringzilla_test)
endif()
endif()

12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Coming soon.

## Quick Start: Python 🐍

1️. Install via pip: `pip install stringzilla`
1. Import the classes you need: `from stringzilla import Str, Strs, File`
1. Install via pip: `pip install stringzilla`
2. Import the classes you need: `from stringzilla import Str, Strs, File`

### Basic Usage

Expand Down Expand Up @@ -115,13 +115,13 @@ There is an ABI-stable C 99 interface, in case you have a database, an operating
#include "stringzilla.h"

// Initialize your haystack and needle
sz_haystack_t haystack = {your_text, your_text_length};
sz_needle_t needle = {your_subtext, your_subtext_length, your_anomaly_offset};
sz_string_view_t haystack = {your_text, your_text_length};
sz_string_view_t needle = {your_subtext, your_subtext_length};

// Perform string-level operations
size_t character_count = sz_count_char(haystack, 'a');
size_t character_position = sz_find_char(haystack, 'a');
size_t substring_position = sz_find_substr(haystack, needle);
size_t character_position = sz_find_unigram(haystack, 'a');
size_t substring_position = sz_find_substring(haystack, needle);

// Perform collection level operations
sz_sequence_t array = {your_order, your_count, your_get_start, your_get_length, your_handle};
Expand Down
Loading