Skip to content

Commit

Permalink
Added SSE intrinsics to scanner.
Browse files Browse the repository at this point in the history
The gain seems to be only measurable on rather long messages.
  • Loading branch information
maxim2266 committed Sep 21, 2015
1 parent 3cfcb0b commit 30167d8
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ CC = gcc -std=c11

release release32 : CFLAGS = -O3 -Wall -Wextra -Iinclude -march=native -mtune=native \
-fomit-frame-pointer -Wl,--as-needed -flto -ffunction-sections -fdata-sections -Wl,--gc-sections \
-DNDEBUG -DRELEASE
-DNDEBUG -DRELEASE -DUSE_SSE

debug : CFLAGS = -g -Wall -Wextra -Iinclude -DDEBUG

Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ _OS:_ Linux Mint 17.2 64bit

FIX message type | FIX specification | Validation | Average time to parse one message
----------------------------------|------------------------------------------|------------|--------------------------------------------------
NewOrderSingle('D') | Hand-coded spec. for this message only | No | 0.328 µs/msg
NewOrderSingle('D') | Hand-coded spec. for this message only | Yes | 0.561 µs/msg
NewOrderSingle('D') | Compiled full spec. for FIX.4.4 | Yes | 0.734 µs/msg
MarketDataIncrementalRefresh('X') | Hand-coded spec. for this message only | Yes | 1.280 µs/msg
MarketDataIncrementalRefresh('X') | Compiled full spec. for FIX.4.4 | Yes | 1.418 µs/msg
NewOrderSingle('D') | Hand-coded spec. for this message only | No | 0.338 µs/msg
NewOrderSingle('D') | Hand-coded spec. for this message only | Yes | 0.550 µs/msg
NewOrderSingle('D') | Compiled full spec. for FIX.4.4 | Yes | 0.765 µs/msg
MarketDataIncrementalRefresh('X') | Hand-coded spec. for this message only | Yes | 1.245 µs/msg
MarketDataIncrementalRefresh('X') | Compiled full spec. for FIX.4.4 | Yes | 1.392 µs/msg

For more details see `doc/` directory of the project.
35 changes: 33 additions & 2 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "fix_impl.h"

#ifdef USE_SSE
#include <xmmintrin.h>
#endif

// message buffer handling
static
char* make_space(fix_parser* const parser, char* dest, unsigned extra_len)
Expand Down Expand Up @@ -103,9 +107,36 @@ bool copy_chunk(scanner_state* const state)
static
unsigned char copy_cs(char* restrict dest, const char* restrict src, unsigned n)
{
unsigned char cs = (*dest++ = *src++);
unsigned char cs = 0;

#ifdef USE_SSE
if(n >= sizeof(__m128i))
{
__m128i cs128 = _mm_loadu_si128((const __m128i*)src);

src += sizeof(__m128i);
_mm_storeu_si128((__m128i*)dest, cs128);
dest += sizeof(__m128i);

while((n -= sizeof(__m128i)) >= sizeof(__m128i))
{
const __m128i tmp = _mm_loadu_si128((const __m128i*)src);

src += sizeof(__m128i);
_mm_storeu_si128((__m128i*)dest, tmp);
dest += sizeof(__m128i);
cs128 = _mm_add_epi8(cs128, tmp);
}

cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 8));
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 4));
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 2));
cs128 = _mm_add_epi8(cs128, _mm_srli_si128(cs128, 1));
cs += _mm_extract_epi16(cs128, 0); // SSE4: _mm_extract_epi8 ?
}
#endif // #ifdef USE_SSE

while(--n > 0)
while(n-- > 0)
cs += (*dest++ = *src++);

return cs;
Expand Down

0 comments on commit 30167d8

Please sign in to comment.