Memory fix (#20)

* Fixed read_csv() memory errors * Fixed segfaults in MSVC debug mode * Travis build changes * Fixed .travis.yml * Attempted Travis fix #2 * Fix Makefile * Fixed g++ compilation warnings * Fix linker errors * Keep forgetting stuff * Final Makefile fix (I hope) * Updated documentation
vincentlaucsb · Mar 31, 2019 · bcd14c3 · bcd14c3
1 parent 0041749
commit bcd14c3
Show file tree

Hide file tree

Showing 254 changed files with 442 additions and 15,939 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,10 @@
 # Build
 bin/
 
+# Doxygen
+docs/html
+*.tmp
+
 # Visual Studio
 Debug/
 Release/

diff --git a/.travis.yml b/.travis.yml
@@ -1,22 +1,33 @@
 matrix:
   include:
     - os: linux
+      env: STD=c++11 MAIN_BUILD=true
+      compiler: gcc
       addons:
         apt:
           sources: ['ubuntu-toolchain-r-test']
           packages: ['g++-8', 'valgrind']
+    - os: linux
+      env: STD=c++17
+      compiler: gcc
+      addons:
+        apt:
+          sources: ['ubuntu-toolchain-r-test']
+          packages: ['g++-8']
+    #- os: linux
+    #  env: CXX=clang++
 dist: trusty
 sudo: required
 language:
   - cpp
 script:
-  - export CC=gcc-8
-  - export CXX=g++-8
-  - make test_all
-  # - valgrind --leak-check=full ./test_csv_parser
-  # Disable until Travis updates their version of Valgrind
+  - make run_csv_test
+  - if [ "$MAIN_BUILD" == "true" ]; then
+        make code_cov;
+        make valgrind;
+    fi;
 after_success:
-  - if [ "$CXX" == "g++-8" ]; then
+  - if [ "$MAIN_BUILD" == "true" ]; then
         cd test_results;
         bash <(curl -s https://codecov.io/bash);
-    fi;
+    fi;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,13 +26,13 @@ set(TEST_DIR ${CMAKE_CURRENT_LIST_DIR}/tests)
 
 # file(GLOB_RECURSE SOURCES include/ *.hpp *.cpp)
 set(SOURCES
-	${CMAKE_CURRENT_LIST_DIR}/include/external/string_view.hpp
 	${SOURCE_DIR}/csv_reader.cpp
 	${SOURCE_DIR}/csv_reader_iterator.cpp
 	${SOURCE_DIR}/csv_row.cpp
 	${SOURCE_DIR}/csv_stat.cpp
 	${SOURCE_DIR}/csv_utility.cpp
 	${SOURCE_DIR}/data_type.cpp
+	${SOURCE_DIR}/giant_string_buffer.cpp
 )
 set(TEST_SOURCES
 	${TEST_DIR}/catch.hpp

diff --git a/Doxyfile b/Doxyfile
@@ -771,14 +771,14 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = README.md src/
+INPUT                  = docs/source/ include/internal/
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
 # documentation (see: http://www.gnu.org/software/libiconv) for the list of
 # possible encodings.
-# The default value is: UTF-8.
+# The default value is: UTF-8.f
 
 INPUT_ENCODING         = UTF-8
 
@@ -920,7 +920,7 @@ FILTER_SOURCE_PATTERNS =
 # (index.html). This can be useful if you have a project on for instance GitHub
 # and want to reuse the introduction page also for the doxygen output.
 
-USE_MDFILE_AS_MAINPAGE = README.md
+USE_MDFILE_AS_MAINPAGE = Doxy.md
 
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
@@ -1068,7 +1068,7 @@ GENERATE_HTML          = YES
 # The default directory is: html.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_OUTPUT            = docs
+HTML_OUTPUT            = docs/html
 
 # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
 # generated HTML page (for example: .htm, .php, .asp).

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2017 Vincent La
+Copyright (c) 2017-2019 Vincent La
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/Makefile b/Makefile
@@ -1,8 +1,27 @@
+# Makefile used for building/testing on Travis CI
+
+# Force Travis to use updated compilers
+ifeq ($(TRAVIS_COMPILER), gcc)
+	CXX = g++-8
+else ifeq ($(TRAVIS_COMPILER), clang)
+	CXX = clang++
+endif
+
+ifeq ($(STD), )
+	STD = c++11
+endif
+
 BUILD_DIR = build
+SOURCE_DIR = include
 TEST_DIR = tests
-IDIR = include/
-CFLAGS = -pthread -std=c++14
-TFLAGS = -I$(IDIR) -Itests/ $(CFLAGS) -Og -g --coverage
+CFLAGS = -pthread -std=$(STD)
+
+TEST_OFLAGS =
+ifeq ($(CXX), g++-8)
+	TEST_OFLAGS = -Og
+endif
+
+TEST_FLAGS = -Itests/ $(CFLAGS) $(TEST_OFLAGS) -g --coverage -Wall
 
 # Main Library
 SOURCES = $(wildcard include/internal/*.cpp)
@@ -13,43 +32,65 @@ TEST_SOURCES_NO_EXT = $(subst tests/,,$(subst .cpp,,$(TEST_SOURCES)))
 
 all: csv_parser test_all clean distclean
 
-# Main Library
-csv_parser:
-	$(CXX) -c -O3 -Wall $(CFLAGS) $(SOURCES) -I$(IDIR)
+################
+# Main Library #
+################
+csv:
+	$(CXX) -c -O3 $(CFLAGS) $(SOURCES)
 	mkdir -p $(BUILD_DIR)
 	mv *.o $(BUILD_DIR)
 
-test_all:
-	make run_test_csv_parser
-	make code_cov
+libcsv.a:
+	make csv
+	ar rvs libcsv.a $(wildcard build/*.o)
+
+docs:
+	doxygen Doxyfile
 
-test_csv_parser:
-	$(CXX) -o test_csv_parser $(SOURCES) $(TEST_SOURCES) $(TFLAGS)
+############
+# Programs #
+############
+csv_stats: # libcsv.a
+	$(CXX) -o csv_stats -O3 $(CFLAGS) programs/csv_stats.cpp $(SOURCES) -I$(SOURCE_DIR)
+	# $(CXX) -o csv_stats -O3 -lcsv $(CFLAGS) programs/csv_stats.cpp -L./ -I$(SOURCE_DIR)
 
-run_test_csv_parser: test_csv_parser
+#########
+# Tests #
+#########	
+csv_test:
+	$(CXX) -o csv_test $(SOURCES) $(TEST_SOURCES) -I${SOURCE_DIR} $(TEST_FLAGS)
+
+run_csv_test: csv_test
 	mkdir -p tests/temp
-	./test_csv_parser
+	./csv_test
 
 	# Test Clean-Up
 	rm -rf $(TEST_DIR)/temp
 
-code_cov: test_csv_parser
+# Run code coverage analysis
+code_cov: csv_test
 	mkdir -p test_results
 	mv *.gcno *.gcda $(PWD)/test_results
 	gcov-8 $(SOURCES) -o test_results --relative-only
 	mv *.gcov test_results
 
+# Generate report
 code_cov_report:
 	cd test_results
 	lcov --capture --directory test_results --output-file coverage.info
 	genhtml coverage.info --output-directory out
+
+valgrind: csv_stats
+	# Can't run valgrind against csv_test because it mangles the working directory
+	# which causes csv_test to not be able to find test files
+	valgrind --leak-check=full ./csv_stats $(TEST_DIR)/data/real_data/2016_Gaz_place_national.txt
 
 .PHONY: all clean distclean
 
-docs:
-	doxygen Doxyfile
-
-clean:	
-	rm -rf test_csv_parser
+clean:
+	rm -f build/*
+	rm -f *.gc*
+	rm -f libcsv.a
+	rm -f csv_*
 
 distclean: clean
diff --git a/README.md b/README.md
@@ -11,6 +11,9 @@ This CSV parser uses multiple threads to simulatenously pull data from disk and
 ### RFC 4180 Compliance
 This CSV parser is much more than a fancy string splitter, and follows every guideline from [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt). On the other hand, it is also robust and capable of handling deviances from the standard. An optional strict parsing mode can be enabled to sniff out errors in files.
 
+#### Encoding
+This CSV parser will handle ANSI and UTF-8 encoded files. It does not try to decode UTF-8, except for detecting and stripping byte order marks.
+
 ### Easy to Use and [Well-Documented](https://vincentlaucsb.github.io/csv-parser)
 
 In additon to being easy on your computer's hardware, this library is also easy on you--the developer. Some helpful features include:
@@ -19,13 +22,21 @@ In additon to being easy on your computer's hardware, this library is also easy
  * Ability to manually set the delimiter and quoting character of the parser
 
 ### Well Tested
-In addition to using modern C++ features to build a memory safe parser while still performing well, this parser has a extensive test suite.
+This CSV parser has an extensive test suite and is checked for memory safety with Valgrind. If you still manage to find a bug,
+do not hesitate to report it.
+
+## Building and Compatibility [(latest stable version)](https://github.com/vincentlaucsb/csv-parser/releases)
 
-## Building [(latest stable version)](https://github.com/vincentlaucsb/csv-parser/releases)
+This library was developed with Microsoft Visual Studio and is compatible with g++ and clang.
+All of the code required to build this library, aside from the C++ standard library, is contained under `include/`.
 
-All of this library's essentials are located under `src/`, with no dependencies aside from the STL. This is a C++17 library developed using Microsoft Visual Studio and compatible with g++ and clang. The CMakeList and Makefile contain instructions for building the main library, some sample programs, and the test suite.
+**One-line compilation** `g++ -pthread -c -O3 -std=c++17 include/internal/*.cpp`
 
-**GCC/Clang Compiler Flags**: `-pthread -O3 -std=c++17`
+### C++ Version
+C++11 is the minimal version required. This library makes extensive use of string views, either through
+[Martin Moene's string view library](https://github.com/martinmoene/string-view-lite) or 
+`std:string_view` when compiling with C++17. Please be aware of this if you use parts of the public API that
+return string views.
 
 ### CMake Instructions
 If you're including this in another CMake project, you can simply clone this repo into your project directory, 
@@ -41,27 +52,8 @@ target_link_libraries(<your program> csv)
 
 ```
 
-## Thirty-Second Introduction to Vince's CSV Parser
-
-* **Parsing CSV Files from..**
-  * Files: csv::CSVReader(filename)
-  * In-Memory Sources:
-    * Small: csv::parse() or csv::operator""_csv();
-    * Large: csv::CSVReader::feed();
-* **Retrieving Parsed CSV Rows (from CSVReader)**
-  * csv::CSVReader::iterator (supports range-based for loop)
-  * csv::CSVReader::read_row()
-* **Working with CSV Rows**
-  * Index by number or name: csv::CSVRow::operator[]()
-  * Random access iterator: csv::CSVRow::iterator
-  * Conversion: csv::CSVRow::operator std::vector<std::string>();
-* **Calculating Statistics**
-  * Files: csv::CSVStat(filename)
-  * In-Memory: csv::CSVStat::feed()
-* **Utility Functions**
-  * Return column names: get_col_names()
-  * Return the position of a column: get_col_pos();
-  * Return column types (for uploading to a SQL database): csv_data_types();
+### Single Header
+A single header version of this library is in the works.
 
 ## Features & Examples
 ### Reading a Large File (with Iterators)

diff --git a/docs/README_8md_source.html b/docs/README_8md_source.html