Skip to content

Commit

Permalink
Merge branch develop into master
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangwang1 committed Jan 24, 2018
2 parents aff7242 + 582fd30 commit 0a1c5c4
Show file tree
Hide file tree
Showing 136 changed files with 30,403 additions and 379 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ bin

# sigs dir is handled externally
signatures
# but not the regression tests
!tools/hscollider/test_cases/signatures

# ignore pcre symlink if it exists
pcre
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

This is a list of notable changes to Hyperscan, in reverse chronological order.

## [4.7.0] 2018-01-24
- Introduced hscollider pattern testing tool, for validating Hyperscan match
behaviour against PCRE.
- Introduced hscheck pattern compilation tool.
- Introduced hsdump development tool for producing information about Hyperscan
pattern compilation.
- New API feature: extended approximate matching support for Hamming distance.
- Bugfix for issue #69: Force C++ linkage in Xcode.
- Bugfix for issue #73: More documentation for `hs_close_stream()`.
- Bugfix for issue #78: Fix for fat runtime initialisation when used as a
shared library.

## [4.6.0] 2017-09-22
- New API feature: stream state compression. This allows the user to compress
and restore state for streams to reduce memory usage.
Expand Down
54 changes: 38 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX)

set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 6)
set (HS_MINOR_VERSION 7)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})

Expand Down Expand Up @@ -30,7 +30,7 @@ else()
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
endif()

if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO)
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO|MINSIZEREL)
set(RELEASE_BUILD TRUE)
else()
set(RELEASE_BUILD FALSE)
Expand Down Expand Up @@ -218,8 +218,13 @@ else()
endif()

if(OPTIMISE)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O2")
if (NOT CMAKE_BUILD_TYPE MATCHES MINSIZEREL)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O2")
else ()
set(OPT_C_FLAG "-Os")
set(OPT_CXX_FLAG "-Os")
endif ()
else()
set(OPT_C_FLAG "-O0")
set(OPT_CXX_FLAG "-O0")
Expand Down Expand Up @@ -423,10 +428,10 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")

if(NOT WIN32)
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 279 -diag-disable=remark")
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable 279 -diag-disable=remark")
endif()
endif()

Expand Down Expand Up @@ -1271,25 +1276,42 @@ if (NOT BUILD_SHARED_LIBS)
endif()

if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
set(hs_shared_SRCS
src/hs_version.c
src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_compile_shared>)

if (XCODE)
# force this lib to use C++ linkage
add_custom_command(OUTPUT empty.cxx
COMMAND ${CMAKE_COMMAND} -E touch empty.cxx)
set (hs_shared_SRCS ${hs_shared_SRCS} empty.cxx)
endif (XCODE)

if (NOT FAT_RUNTIME)
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_common_shared>
set(hs_shared_SRCS
${hs_shared_SRCS}
$<TARGET_OBJECTS:hs_exec_shared>)
else ()
set(hs_shared_SRCS
${hs_shared_SRCS}
$<TARGET_OBJECTS:hs_exec_common_shared>
${RUNTIME_SHLIBS})
endif ()

add_library(hs_shared SHARED ${hs_shared_SRCS})

endif()
add_dependencies(hs_shared ragel_Parser)
set_target_properties(hs_shared PROPERTIES
OUTPUT_NAME hs
VERSION ${LIB_VERSION}
SOVERSION ${LIB_SOVERSION}
MACOSX_RPATH ON)
install(TARGETS hs_shared
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})

install(TARGETS hs_shared
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

# used by tools and other targets
Expand Down
64 changes: 64 additions & 0 deletions cmake/pcre.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# first look in pcre-$version or pcre subdirs
if (PCRE_SOURCE)
# either provided on cmdline or we've seen it already
set (PCRE_BUILD_SOURCE TRUE)
elseif (EXISTS ${PROJECT_SOURCE_DIR}/pcre-${PCRE_REQUIRED_VERSION})
set (PCRE_SOURCE ${PROJECT_SOURCE_DIR}/pcre-${PCRE_REQUIRED_VERSION})
set (PCRE_BUILD_SOURCE TRUE)
elseif (EXISTS ${PROJECT_SOURCE_DIR}/pcre)
set (PCRE_SOURCE ${PROJECT_SOURCE_DIR}/pcre)
set (PCRE_BUILD_SOURCE TRUE)
endif()

if (PCRE_BUILD_SOURCE)
if (NOT IS_ABSOLUTE ${PCRE_SOURCE})
set(PCRE_SOURCE "${CMAKE_BINARY_DIR}/${PCRE_SOURCE}")
endif ()
set (saved_INCLUDES "${CMAKE_REQUIRED_INCLUDES}")
set (CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES} ${PCRE_SOURCE}")

if (PCRE_CHECKED)
set(PCRE_INCLUDE_DIRS ${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre)
set(PCRE_LDFLAGS -L"${LIBDIR}" -lpcre)

# already processed this file and set up pcre building
return()
endif ()

# first, check version number
CHECK_C_SOURCE_COMPILES("#include <pcre.h.generic>
#if PCRE_MAJOR != ${PCRE_REQUIRED_MAJOR_VERSION} || PCRE_MINOR != ${PCRE_REQUIRED_MINOR_VERSION}
#error Incorrect pcre version
#endif
main() {}" CORRECT_PCRE_VERSION)
set (CMAKE_REQUIRED_INCLUDES "${saved_INCLUDES}")

if (NOT CORRECT_PCRE_VERSION)
unset(CORRECT_PCRE_VERSION CACHE)
message(STATUS "Incorrect version of pcre - version ${PCRE_REQUIRED_VERSION} is required")
return ()
else()
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} - building from source.")
endif()

# PCRE compile options
option(PCRE_BUILD_PCRECPP OFF)
option(PCRE_BUILD_PCREGREP OFF)
option(PCRE_SHOW_REPORT OFF)
set(PCRE_SUPPORT_UNICODE_PROPERTIES ON CACHE BOOL "Build pcre with unicode")
add_subdirectory(${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre EXCLUDE_FROM_ALL)
set(PCRE_INCLUDE_DIRS ${PCRE_SOURCE} ${PROJECT_BINARY_DIR}/pcre)
set(PCRE_LDFLAGS -L"${LIBDIR}" -lpcre)
else ()
# pkgconf should save us
find_package(PkgConfig)
pkg_check_modules(PCRE libpcre=${PCRE_REQUIRED_VERSION})
if (PCRE_FOUND)
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION}")
else ()
message(STATUS "PCRE version ${PCRE_REQUIRED_VERSION} not found")
return ()
endif ()
endif (PCRE_BUILD_SOURCE)

set (PCRE_CHECKED TRUE PARENT_SCOPE)
2 changes: 1 addition & 1 deletion cmake/sqlite3.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ else()
# build sqlite as a static lib to compile into our test programs
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
if (NOT WIN32)
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-error -Wno-extra -Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
endif()
endif()
endif()
Expand Down
6 changes: 6 additions & 0 deletions doc/dev-reference/_static/hyperscan.css
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@
.regexp {
color: darkred !important;
}

/* Avoid (the alabaster theme default) Goudy Old Style, which renders in
* italics on some Mac/Safari systems. */
body {
font-family: 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
}
39 changes: 27 additions & 12 deletions doc/dev-reference/compilation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ which provides the following fields:
* ``min_length``: The minimum match length (from start to end) required to
successfully match this expression.
* ``edit_distance``: Match this expression within a given Levenshtein distance.
* ``hamming_distance``: Match this expression within a given Hamming distance.
These parameters either allow the set of matches produced by a pattern to be
constrained at compile time (rather than relying on the application to process
Expand All @@ -299,10 +300,15 @@ and a ``max_offset`` of 15 will not produce matches when scanned against
streams ``foo0123bar`` or ``foo0123456bar``.
Similarly, the pattern :regexp:`/foobar/` when given an ``edit_distance`` of 2
will produce matches when scanned against ``foobar``, ``fooba``, ``fobr``,
``fo_baz``, ``foooobar``, and anything else that lies within edit distance of 2
(as defined by Levenshtein distance). For more details, see the
:ref:`approximate_matching` section.
will produce matches when scanned against ``foobar``, ``f00bar``, ``fooba``,
``fobr``, ``fo_baz``, ``foooobar``, and anything else that lies within edit
distance of 2 (as defined by Levenshtein distance).
When the same pattern :regexp:`/foobar/` is given a ``hamming_distance`` of 2,
it will produce matches when scanned against ``foobar``, ``boofar``,
``f00bar``, and anything else with at most two characters substituted from the
original pattern. For more details, see the :ref:`approximate_matching`
section.
=================
Prefiltering Mode
Expand Down Expand Up @@ -377,7 +383,7 @@ The :c:type:`hs_platform_info_t` structure has two fields:
#. ``cpu_features``: This allows the application to specify a mask of CPU
features that may be used on the target platform. For example,
:c:member:`HS_CPU_FEATURES_AVX2` can be specified for Intel\ |reg| Advanced
Vector Extensions +2 (Intel\ |reg| AVX2) instruction set support. If a flag
Vector Extensions 2 (Intel\ |reg| AVX2) instruction set support. If a flag
for a particular CPU feature is specified, the database will not be usable on
a CPU without that feature.
Expand All @@ -398,13 +404,20 @@ follows:
#. **Edit distance** is defined as Levenshtein distance. That is, there are
three possible edit types considered: insertion, removal and substitution.
More formal description can be found on
`Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`_.
A more formal description can be found on
`Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`__.
#. **Hamming distance** is the number of positions by which two strings of
equal length differ. That is, it is the number of substitutions required to
convert one string to the other. There are no insertions or removals when
approximate matching using a Hamming distance. A more formal description can
be found on
`Wikipedia <https://en.wikipedia.org/wiki/Hamming_distance>`__.
#. **Approximate matching** will match all *corpora* within a given edit
distance. That is, given a pattern, approximate matching will match anything
that can be edited to arrive at a corpus that exactly matches the original
pattern.
#. **Approximate matching** will match all *corpora* within a given edit or
Hamming distance. That is, given a pattern, approximate matching will match
anything that can be edited to arrive at a corpus that exactly matches the
original pattern.
#. **Matching semantics** are exactly the same as described in :ref:`semantics`.
Expand Down Expand Up @@ -437,7 +450,9 @@ matching support. Here they are, in a nutshell:
reduce to so-called "vacuous" patterns (patterns that match everything). For
example, pattern :regexp:`/foo/` with edit distance 3, if implemented,
would reduce to matching zero-length buffers. Such patterns will result in a
"Pattern cannot be approximately matched" compile error.
"Pattern cannot be approximately matched" compile error. Approximate
matching within a Hamming distance does not remove symbols, so will not
reduce to a vacuous pattern.
* Finally, due to the inherent complexities of defining matching behavior,
approximate matching implements a reduced subset of regular expression
syntax. Approximate matching does not support UTF-8 (and other
Expand Down
2 changes: 1 addition & 1 deletion doc/dev-reference/conf.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ master_doc = 'index'

# General information about the project.
project = u'Hyperscan'
copyright = u'2015-2017, Intel Corporation'
copyright = u'2015-2018, Intel Corporation'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down
2 changes: 1 addition & 1 deletion doc/dev-reference/copyright.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ and/or other countries.

\*Other names and brands may be claimed as the property of others.

Copyright |copy| 2015-2017, Intel Corporation. All rights reserved.
Copyright |copy| 2015-2018, Intel Corporation. All rights reserved.
8 changes: 4 additions & 4 deletions doc/dev-reference/hyperscan.doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ EXCLUDE_PATTERNS =
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories use the pattern */test/*

EXCLUDE_SYMBOLS =
EXCLUDE_SYMBOLS = "HS_CDECL"

# The EXAMPLE_PATH tag can be used to specify one or more files or directories
# that contain example code fragments that are included (see the \include
Expand Down Expand Up @@ -1959,15 +1959,15 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

MACRO_EXPANSION = NO
MACRO_EXPANSION = YES

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
# EXPAND_AS_DEFINED tags.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

EXPAND_ONLY_PREDEF = NO
EXPAND_ONLY_PREDEF = YES

# If the SEARCH_INCLUDES tag is set to YES, the include files in the
# INCLUDE_PATH will be searched if a #include is found.
Expand Down Expand Up @@ -1999,7 +1999,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

PREDEFINED =
PREDEFINED = "HS_CDECL="

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
Expand Down
20 changes: 15 additions & 5 deletions doc/dev-reference/runtime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,21 @@ See :c:type:`match_event_handler` for more information.
Streaming Mode
**************

The streaming runtime API consists of functions to open, scan, and close
Hyperscan data streams -- these functions being :c:func:`hs_open_stream`,
:c:func:`hs_scan_stream`, and :c:func:`hs_close_stream`. Any matches detected
in the written data are returned to the calling application via a function
pointer callback.
The core of the Hyperscan streaming runtime API consists of functions to open,
scan, and close Hyperscan data streams:

* :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning.

* :c:func:`hs_scan_stream`: scans a block of data in a given stream, raising
matches as they are detected.

* :c:func:`hs_close_stream`: completes scanning of a given stream (raising any
matches that occur at the end of the stream) and frees the stream state. After
a call to :c:func:`hs_close_stream`, the stream handle is invalid and should
not be used again for any purpose.

Any matches detected in the data as it is scanned are returned to the calling
application via a function pointer callback.

The match callback function has the capability to halt scanning of the current
data stream by returning a non-zero value. In streaming mode, the result of
Expand Down
Loading

0 comments on commit 0a1c5c4

Please sign in to comment.