diff --git a/app/controllers/core.server.controller.js b/app/controllers/core.server.controller.js index 6540b11..029cca5 100644 --- a/app/controllers/core.server.controller.js +++ b/app/controllers/core.server.controller.js @@ -4,7 +4,6 @@ * Module dependencies. */ exports.index = function(req, res) { - var qs = require('querystring'); if (req.method == "POST"){ var body = ''; req.on('data', function (data) { @@ -17,8 +16,13 @@ exports.index = function(req, res) { // naming this "file_data" because I assume that we will // only be POST-ing imported files. This may change in the // future. - var file_data = qs.parse(body); - res.send(file_data); + + // split out the contents from the headers + var csv_body = body.split("Content-Type: text/csv\r\n\r\n"); + // and from the footers + var csv_only = csv_body[1].split("\r\n\r\n-----------------------------"); + var csv_contents = csv_only[0]; + res.send(csv_contents); }); } else{ diff --git a/app/views/index.jade b/app/views/index.jade index a27f121..168e003 100644 --- a/app/views/index.jade +++ b/app/views/index.jade @@ -17,7 +17,7 @@ html(lang="en") script(type='text/javascript', src='js/lib/FileSaver.js/FileSaver.min.js') script(type='text/javascript', src='js/lib/Blob.js/Blob.min.js') script(type='text/javascript', src='js/lib/sampleData.js') - script(type='text/javascript', src='js/jquery.csv.js') + script(type='text/javascript', src='js/papaparse.js') script(type='text/javascript', src='js/index.js') body diff --git a/public/js/index.js b/public/js/index.js index 9d86162..f00dcfb 100644 --- a/public/js/index.js +++ b/public/js/index.js @@ -522,6 +522,7 @@ $(function() { function importAll() { // get file + // need to check to make sure that it is a csv file, here var formData = new FormData(); formData.append('file', $('input[type=file]')[0].files[0]); $.ajax("/upload", { @@ -530,12 +531,14 @@ $(function() { processData: false, contentType: false }).done(function(response) { - console.log(response); // read each line of file + var result = Papa.parse(response); + console.log("DEBUG: " + result); + // loop through array // for each line, check whether ssn exists via API - // if it does, put that line in a list of duplicates - // if it doesn't, POST that line to the API - // display any duplicates to the user + // if it does, put that record in a list of possible duplicates + // if it doesn't, POST that record to the API + // display the list of possible duplicates to the user }); } diff --git a/public/js/papaparse.js b/public/js/papaparse.js new file mode 100644 index 0000000..98e8ee6 --- /dev/null +++ b/public/js/papaparse.js @@ -0,0 +1,1403 @@ +/*! + Papa Parse + v4.1.1 + https://github.com/mholt/PapaParse +*/ +(function(global) +{ + "use strict"; + + var IS_WORKER = !global.document && !!global.postMessage, + IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search), + LOADED_SYNC = false, AUTO_SCRIPT_PATH; + var workers = {}, workerIdCounter = 0; + + var Papa = {}; + + Papa.parse = CsvToJson; + Papa.unparse = JsonToCsv; + + Papa.RECORD_SEP = String.fromCharCode(30); + Papa.UNIT_SEP = String.fromCharCode(31); + Papa.BYTE_ORDER_MARK = "\ufeff"; + Papa.BAD_DELIMITERS = ["\r", "\n", "\"", Papa.BYTE_ORDER_MARK]; + Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker; + Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously + + // Configurable chunk sizes for local and remote files, respectively + Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB + Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB + Papa.DefaultDelimiter = ","; // Used if not specified and detection fails + + // Exposed for testing and development only + Papa.Parser = Parser; + Papa.ParserHandle = ParserHandle; + Papa.NetworkStreamer = NetworkStreamer; + Papa.FileStreamer = FileStreamer; + Papa.StringStreamer = StringStreamer; + + if (typeof module !== 'undefined' && module.exports) + { + // Export to Node... + module.exports = Papa; + } + else if (isFunction(global.define) && global.define.amd) + { + // Wireup with RequireJS + define(function() { return Papa; }); + } + else + { + // ...or as browser global + global.Papa = Papa; + } + + if (global.jQuery) + { + var $ = global.jQuery; + $.fn.parse = function(options) + { + var config = options.config || {}; + var queue = []; + + this.each(function(idx) + { + var supported = $(this).prop('tagName').toUpperCase() == "INPUT" + && $(this).attr('type').toLowerCase() == "file" + && global.FileReader; + + if (!supported || !this.files || this.files.length == 0) + return true; // continue to next input element + + for (var i = 0; i < this.files.length; i++) + { + queue.push({ + file: this.files[i], + inputElem: this, + instanceConfig: $.extend({}, config) + }); + } + }); + + parseNextFile(); // begin parsing + return this; // maintains chainability + + + function parseNextFile() + { + if (queue.length == 0) + { + if (isFunction(options.complete)) + options.complete(); + return; + } + + var f = queue[0]; + + if (isFunction(options.before)) + { + var returned = options.before(f.file, f.inputElem); + + if (typeof returned === 'object') + { + if (returned.action == "abort") + { + error("AbortError", f.file, f.inputElem, returned.reason); + return; // Aborts all queued files immediately + } + else if (returned.action == "skip") + { + fileComplete(); // parse the next file in the queue, if any + return; + } + else if (typeof returned.config === 'object') + f.instanceConfig = $.extend(f.instanceConfig, returned.config); + } + else if (returned == "skip") + { + fileComplete(); // parse the next file in the queue, if any + return; + } + } + + // Wrap up the user's complete callback, if any, so that ours also gets executed + var userCompleteFunc = f.instanceConfig.complete; + f.instanceConfig.complete = function(results) + { + if (isFunction(userCompleteFunc)) + userCompleteFunc(results, f.file, f.inputElem); + fileComplete(); + }; + + Papa.parse(f.file, f.instanceConfig); + } + + function error(name, file, elem, reason) + { + if (isFunction(options.error)) + options.error({name: name}, file, elem, reason); + } + + function fileComplete() + { + queue.splice(0, 1); + parseNextFile(); + } + } + } + + + if (IS_PAPA_WORKER) + { + global.onmessage = workerThreadReceivedMessage; + } + else if (Papa.WORKERS_SUPPORTED) + { + AUTO_SCRIPT_PATH = getScriptPath(); + + // Check if the script was loaded synchronously + if (!document.body) + { + // Body doesn't exist yet, must be synchronous + LOADED_SYNC = true; + } + else + { + document.addEventListener('DOMContentLoaded', function () { + LOADED_SYNC = true; + }, true); + } + } + + + + + function CsvToJson(_input, _config) + { + _config = _config || {}; + + if (_config.worker && Papa.WORKERS_SUPPORTED) + { + var w = newWorker(); + + w.userStep = _config.step; + w.userChunk = _config.chunk; + w.userComplete = _config.complete; + w.userError = _config.error; + + _config.step = isFunction(_config.step); + _config.chunk = isFunction(_config.chunk); + _config.complete = isFunction(_config.complete); + _config.error = isFunction(_config.error); + delete _config.worker; // prevent infinite loop + + w.postMessage({ + input: _input, + config: _config, + workerId: w.id + }); + + return; + } + + var streamer = null; + if (typeof _input === 'string') + { + if (_config.download) + streamer = new NetworkStreamer(_config); + else + streamer = new StringStreamer(_config); + } + else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106) + streamer = new FileStreamer(_config); + + return streamer.stream(_input); + } + + + + + + + function JsonToCsv(_input, _config) + { + var _output = ""; + var _fields = []; + + // Default configuration + + /** whether to surround every datum with quotes */ + var _quotes = false; + + /** delimiting character */ + var _delimiter = ","; + + /** newline character(s) */ + var _newline = "\r\n"; + + unpackConfig(); + + if (typeof _input === 'string') + _input = JSON.parse(_input); + + if (_input instanceof Array) + { + if (!_input.length || _input[0] instanceof Array) + return serialize(null, _input); + else if (typeof _input[0] === 'object') + return serialize(objectKeys(_input[0]), _input); + } + else if (typeof _input === 'object') + { + if (typeof _input.data === 'string') + _input.data = JSON.parse(_input.data); + + if (_input.data instanceof Array) + { + if (!_input.fields) + _input.fields = _input.data[0] instanceof Array + ? _input.fields + : objectKeys(_input.data[0]); + + if (!(_input.data[0] instanceof Array) && typeof _input.data[0] !== 'object') + _input.data = [_input.data]; // handles input like [1,2,3] or ["asdf"] + } + + return serialize(_input.fields || [], _input.data || []); + } + + // Default (any valid paths should return before this) + throw "exception: Unable to serialize unrecognized input"; + + + function unpackConfig() + { + if (typeof _config !== 'object') + return; + + if (typeof _config.delimiter === 'string' + && _config.delimiter.length == 1 + && Papa.BAD_DELIMITERS.indexOf(_config.delimiter) == -1) + { + _delimiter = _config.delimiter; + } + + if (typeof _config.quotes === 'boolean' + || _config.quotes instanceof Array) + _quotes = _config.quotes; + + if (typeof _config.newline === 'string') + _newline = _config.newline; + } + + + /** Turns an object's keys into an array */ + function objectKeys(obj) + { + if (typeof obj !== 'object') + return []; + var keys = []; + for (var key in obj) + keys.push(key); + return keys; + } + + /** The double for loop that iterates the data and writes out a CSV string including header row */ + function serialize(fields, data) + { + var csv = ""; + + if (typeof fields === 'string') + fields = JSON.parse(fields); + if (typeof data === 'string') + data = JSON.parse(data); + + var hasHeader = fields instanceof Array && fields.length > 0; + var dataKeyedByField = !(data[0] instanceof Array); + + // If there a header row, write it first + if (hasHeader) + { + for (var i = 0; i < fields.length; i++) + { + if (i > 0) + csv += _delimiter; + csv += safe(fields[i], i); + } + if (data.length > 0) + csv += _newline; + } + + // Then write out the data + for (var row = 0; row < data.length; row++) + { + var maxCol = hasHeader ? fields.length : data[row].length; + + for (var col = 0; col < maxCol; col++) + { + if (col > 0) + csv += _delimiter; + var colIdx = hasHeader && dataKeyedByField ? fields[col] : col; + csv += safe(data[row][colIdx], col); + } + + if (row < data.length - 1) + csv += _newline; + } + + return csv; + } + + /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */ + function safe(str, col) + { + if (typeof str === "undefined" || str === null) + return ""; + + str = str.toString().replace(/"/g, '""'); + + var needsQuotes = (typeof _quotes === 'boolean' && _quotes) + || (_quotes instanceof Array && _quotes[col]) + || hasAny(str, Papa.BAD_DELIMITERS) + || str.indexOf(_delimiter) > -1 + || str.charAt(0) == ' ' + || str.charAt(str.length - 1) == ' '; + + return needsQuotes ? '"' + str + '"' : str; + } + + function hasAny(str, substrings) + { + for (var i = 0; i < substrings.length; i++) + if (str.indexOf(substrings[i]) > -1) + return true; + return false; + } + } + + /** ChunkStreamer is the base prototype for various streamer implementations. */ + function ChunkStreamer(config) + { + this._handle = null; + this._paused = false; + this._finished = false; + this._input = null; + this._baseIndex = 0; + this._partialLine = ""; + this._rowCount = 0; + this._start = 0; + this._nextChunk = null; + this.isFirstChunk = true; + this._completeResults = { + data: [], + errors: [], + meta: {} + }; + replaceConfig.call(this, config); + + this.parseChunk = function(chunk) + { + // First chunk pre-processing + if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) + { + var modifiedChunk = this._config.beforeFirstChunk(chunk); + if (modifiedChunk !== undefined) + chunk = modifiedChunk; + } + this.isFirstChunk = false; + + // Rejoin the line we likely just split in two by chunking the file + var aggregate = this._partialLine + chunk; + this._partialLine = ""; + + var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); + + if (this._handle.paused() || this._handle.aborted()) + return; + + var lastIndex = results.meta.cursor; + + if (!this._finished) + { + this._partialLine = aggregate.substring(lastIndex - this._baseIndex); + this._baseIndex = lastIndex; + } + + if (results && results.data) + this._rowCount += results.data.length; + + var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview); + + if (IS_PAPA_WORKER) + { + global.postMessage({ + results: results, + workerId: Papa.WORKER_ID, + finished: finishedIncludingPreview + }); + } + else if (isFunction(this._config.chunk)) + { + this._config.chunk(results, this._handle); + if (this._paused) + return; + results = undefined; + this._completeResults = undefined; + } + + if (!this._config.step && !this._config.chunk) { + this._completeResults.data = this._completeResults.data.concat(results.data); + this._completeResults.errors = this._completeResults.errors.concat(results.errors); + this._completeResults.meta = results.meta; + } + + if (finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) + this._config.complete(this._completeResults); + + if (!finishedIncludingPreview && (!results || !results.meta.paused)) + this._nextChunk(); + + return results; + }; + + this._sendError = function(error) + { + if (isFunction(this._config.error)) + this._config.error(error); + else if (IS_PAPA_WORKER && this._config.error) + { + global.postMessage({ + workerId: Papa.WORKER_ID, + error: error, + finished: false + }); + } + }; + + function replaceConfig(config) + { + // Deep-copy the config so we can edit it + var configCopy = copy(config); + configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings! + if (!config.step && !config.chunk) + configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196 + this._handle = new ParserHandle(configCopy); + this._handle.streamer = this; + this._config = configCopy; // persist the copy to the caller + } + } + + + function NetworkStreamer(config) + { + config = config || {}; + if (!config.chunkSize) + config.chunkSize = Papa.RemoteChunkSize; + ChunkStreamer.call(this, config); + + var xhr; + + if (IS_WORKER) + { + this._nextChunk = function() + { + this._readChunk(); + this._chunkLoaded(); + }; + } + else + { + this._nextChunk = function() + { + this._readChunk(); + }; + } + + this.stream = function(url) + { + this._input = url; + this._nextChunk(); // Starts streaming + }; + + this._readChunk = function() + { + if (this._finished) + { + this._chunkLoaded(); + return; + } + + xhr = new XMLHttpRequest(); + + if (!IS_WORKER) + { + xhr.onload = bindFunction(this._chunkLoaded, this); + xhr.onerror = bindFunction(this._chunkError, this); + } + + xhr.open("GET", this._input, !IS_WORKER); + + if (this._config.chunkSize) + { + var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive + xhr.setRequestHeader("Range", "bytes="+this._start+"-"+end); + xhr.setRequestHeader("If-None-Match", "webkit-no-cache"); // https://bugs.webkit.org/show_bug.cgi?id=82672 + } + + try { + xhr.send(); + } + catch (err) { + this._chunkError(err.message); + } + + if (IS_WORKER && xhr.status == 0) + this._chunkError(); + else + this._start += this._config.chunkSize; + } + + this._chunkLoaded = function() + { + if (xhr.readyState != 4) + return; + + if (xhr.status < 200 || xhr.status >= 400) + { + this._chunkError(); + return; + } + + this._finished = !this._config.chunkSize || this._start > getFileSize(xhr); + this.parseChunk(xhr.responseText); + } + + this._chunkError = function(errorMessage) + { + var errorText = xhr.statusText || errorMessage; + this._sendError(errorText); + } + + function getFileSize(xhr) + { + var contentRange = xhr.getResponseHeader("Content-Range"); + return parseInt(contentRange.substr(contentRange.lastIndexOf("/") + 1)); + } + } + NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype); + NetworkStreamer.prototype.constructor = NetworkStreamer; + + + function FileStreamer(config) + { + config = config || {}; + if (!config.chunkSize) + config.chunkSize = Papa.LocalChunkSize; + ChunkStreamer.call(this, config); + + var reader, slice; + + // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862 + // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76 + var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105 + + this.stream = function(file) + { + this._input = file; + slice = file.slice || file.webkitSlice || file.mozSlice; + + if (usingAsyncReader) + { + reader = new FileReader(); // Preferred method of reading files, even in workers + reader.onload = bindFunction(this._chunkLoaded, this); + reader.onerror = bindFunction(this._chunkError, this); + } + else + reader = new FileReaderSync(); // Hack for running in a web worker in Firefox + + this._nextChunk(); // Starts streaming + }; + + this._nextChunk = function() + { + if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview)) + this._readChunk(); + } + + this._readChunk = function() + { + var input = this._input; + if (this._config.chunkSize) + { + var end = Math.min(this._start + this._config.chunkSize, this._input.size); + input = slice.call(input, this._start, end); + } + var txt = reader.readAsText(input, this._config.encoding); + if (!usingAsyncReader) + this._chunkLoaded({ target: { result: txt } }); // mimic the async signature + } + + this._chunkLoaded = function(event) + { + // Very important to increment start each time before handling results + this._start += this._config.chunkSize; + this._finished = !this._config.chunkSize || this._start >= this._input.size; + this.parseChunk(event.target.result); + } + + this._chunkError = function() + { + this._sendError(reader.error); + } + + } + FileStreamer.prototype = Object.create(ChunkStreamer.prototype); + FileStreamer.prototype.constructor = FileStreamer; + + + function StringStreamer(config) + { + config = config || {}; + ChunkStreamer.call(this, config); + + var string; + var remaining; + this.stream = function(s) + { + string = s; + remaining = s; + return this._nextChunk(); + } + this._nextChunk = function() + { + if (this._finished) return; + var size = this._config.chunkSize; + var chunk = size ? remaining.substr(0, size) : remaining; + remaining = size ? remaining.substr(size) : ''; + this._finished = !remaining; + return this.parseChunk(chunk); + } + } + StringStreamer.prototype = Object.create(StringStreamer.prototype); + StringStreamer.prototype.constructor = StringStreamer; + + + + // Use one ParserHandle per entire CSV file or string + function ParserHandle(_config) + { + // One goal is to minimize the use of regular expressions... + var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i; + + var self = this; + var _stepCounter = 0; // Number of times step was called (number of rows parsed) + var _input; // The input being parsed + var _parser; // The core parser being used + var _paused = false; // Whether we are paused or not + var _aborted = false; // Whether the parser has aborted or not + var _delimiterError; // Temporary state between delimiter detection and processing results + var _fields = []; // Fields are from the header row of the input, if there is one + var _results = { // The last results returned from the parser + data: [], + errors: [], + meta: {} + }; + + if (isFunction(_config.step)) + { + var userStep = _config.step; + _config.step = function(results) + { + _results = results; + + if (needsHeaderRow()) + processResults(); + else // only call user's step function after header row + { + processResults(); + + // It's possbile that this line was empty and there's no row here after all + if (_results.data.length == 0) + return; + + _stepCounter += results.data.length; + if (_config.preview && _stepCounter > _config.preview) + _parser.abort(); + else + userStep(_results, self); + } + }; + } + + /** + * Parses input. Most users won't need, and shouldn't mess with, the baseIndex + * and ignoreLastRow parameters. They are used by streamers (wrapper functions) + * when an input comes in multiple chunks, like from a file. + */ + this.parse = function(input, baseIndex, ignoreLastRow) + { + if (!_config.newline) + _config.newline = guessLineEndings(input); + + _delimiterError = false; + if (!_config.delimiter) + { + var delimGuess = guessDelimiter(input); + if (delimGuess.successful) + _config.delimiter = delimGuess.bestDelimiter; + else + { + _delimiterError = true; // add error after parsing (otherwise it would be overwritten) + _config.delimiter = Papa.DefaultDelimiter; + } + _results.meta.delimiter = _config.delimiter; + } + + var parserConfig = copy(_config); + if (_config.preview && _config.header) + parserConfig.preview++; // to compensate for header row + + _input = input; + _parser = new Parser(parserConfig); + _results = _parser.parse(_input, baseIndex, ignoreLastRow); + processResults(); + return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); + }; + + this.paused = function() + { + return _paused; + }; + + this.pause = function() + { + _paused = true; + _parser.abort(); + _input = _input.substr(_parser.getCharIndex()); + }; + + this.resume = function() + { + _paused = false; + self.streamer.parseChunk(_input); + }; + + this.aborted = function () { + return _aborted; + } + + this.abort = function() + { + _aborted = true; + _parser.abort(); + _results.meta.aborted = true; + if (isFunction(_config.complete)) + _config.complete(_results); + _input = ""; + }; + + function processResults() + { + if (_results && _delimiterError) + { + addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '"+Papa.DefaultDelimiter+"'"); + _delimiterError = false; + } + + if (_config.skipEmptyLines) + { + for (var i = 0; i < _results.data.length; i++) + if (_results.data[i].length == 1 && _results.data[i][0] == "") + _results.data.splice(i--, 1); + } + + if (needsHeaderRow()) + fillHeaderFields(); + + return applyHeaderAndDynamicTyping(); + } + + function needsHeaderRow() + { + return _config.header && _fields.length == 0; + } + + function fillHeaderFields() + { + if (!_results) + return; + for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) + for (var j = 0; j < _results.data[i].length; j++) + _fields.push(_results.data[i][j]); + _results.data.splice(0, 1); + } + + function applyHeaderAndDynamicTyping() + { + if (!_results || (!_config.header && !_config.dynamicTyping)) + return _results; + + for (var i = 0; i < _results.data.length; i++) + { + var row = {}; + + for (var j = 0; j < _results.data[i].length; j++) + { + if (_config.dynamicTyping) + { + var value = _results.data[i][j]; + if (value == "true" || value == "TRUE") + _results.data[i][j] = true; + else if (value == "false" || value == "FALSE") + _results.data[i][j] = false; + else + _results.data[i][j] = tryParseFloat(value); + } + + if (_config.header) + { + if (j >= _fields.length) + { + if (!row["__parsed_extra"]) + row["__parsed_extra"] = []; + row["__parsed_extra"].push(_results.data[i][j]); + } + else + row[_fields[j]] = _results.data[i][j]; + } + } + + if (_config.header) + { + _results.data[i] = row; + if (j > _fields.length) + addError("FieldMismatch", "TooManyFields", "Too many fields: expected " + _fields.length + " fields but parsed " + j, i); + else if (j < _fields.length) + addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + _fields.length + " fields but parsed " + j, i); + } + } + + if (_config.header && _results.meta) + _results.meta.fields = _fields; + return _results; + } + + function guessDelimiter(input) + { + var delimChoices = [",", "\t", "|", ";", Papa.RECORD_SEP, Papa.UNIT_SEP]; + var bestDelim, bestDelta, fieldCountPrevRow; + + for (var i = 0; i < delimChoices.length; i++) + { + var delim = delimChoices[i]; + var delta = 0, avgFieldCount = 0; + fieldCountPrevRow = undefined; + + var preview = new Parser({ + delimiter: delim, + preview: 10 + }).parse(input); + + for (var j = 0; j < preview.data.length; j++) + { + var fieldCount = preview.data[j].length; + avgFieldCount += fieldCount; + + if (typeof fieldCountPrevRow === 'undefined') + { + fieldCountPrevRow = fieldCount; + continue; + } + else if (fieldCount > 1) + { + delta += Math.abs(fieldCount - fieldCountPrevRow); + fieldCountPrevRow = fieldCount; + } + } + + if (preview.data.length > 0) + avgFieldCount /= preview.data.length; + + if ((typeof bestDelta === 'undefined' || delta < bestDelta) + && avgFieldCount > 1.99) + { + bestDelta = delta; + bestDelim = delim; + } + } + + _config.delimiter = bestDelim; + + return { + successful: !!bestDelim, + bestDelimiter: bestDelim + } + } + + function guessLineEndings(input) + { + input = input.substr(0, 1024*1024); // max length 1 MB + + var r = input.split('\r'); + + if (r.length == 1) + return '\n'; + + var numWithN = 0; + for (var i = 0; i < r.length; i++) + { + if (r[i][0] == '\n') + numWithN++; + } + + return numWithN >= r.length / 2 ? '\r\n' : '\r'; + } + + function tryParseFloat(val) + { + var isNumber = FLOAT.test(val); + return isNumber ? parseFloat(val) : val; + } + + function addError(type, code, msg, row) + { + _results.errors.push({ + type: type, + code: code, + message: msg, + row: row + }); + } + } + + + + + + /** The core parser implements speedy and correct CSV parsing */ + function Parser(config) + { + // Unpack the config object + config = config || {}; + var delim = config.delimiter; + var newline = config.newline; + var comments = config.comments; + var step = config.step; + var preview = config.preview; + var fastMode = config.fastMode; + + // Delimiter must be valid + if (typeof delim !== 'string' + || Papa.BAD_DELIMITERS.indexOf(delim) > -1) + delim = ","; + + // Comment character must be valid + if (comments === delim) + throw "Comment character same as delimiter"; + else if (comments === true) + comments = "#"; + else if (typeof comments !== 'string' + || Papa.BAD_DELIMITERS.indexOf(comments) > -1) + comments = false; + + // Newline must be valid: \r, \n, or \r\n + if (newline != '\n' && newline != '\r' && newline != '\r\n') + newline = '\n'; + + // We're gonna need these at the Parser scope + var cursor = 0; + var aborted = false; + + this.parse = function(input, baseIndex, ignoreLastRow) + { + // For some reason, in Chrome, this speeds things up (!?) + if (typeof input !== 'string') + throw "Input must be a string"; + + // We don't need to compute some of these every time parse() is called, + // but having them in a more local scope seems to perform better + var inputLen = input.length, + delimLen = delim.length, + newlineLen = newline.length, + commentsLen = comments.length; + var stepIsFunction = typeof step === 'function'; + + // Establish starting state + cursor = 0; + var data = [], errors = [], row = [], lastCursor = 0; + + if (!input) + return returnable(); + + if (fastMode || (fastMode !== false && input.indexOf('"') === -1)) + { + var rows = input.split(newline); + for (var i = 0; i < rows.length; i++) + { + var row = rows[i]; + cursor += row.length; + if (i !== rows.length - 1) + cursor += newline.length; + else if (ignoreLastRow) + return returnable(); + if (comments && row.substr(0, commentsLen) == comments) + continue; + if (stepIsFunction) + { + data = []; + pushRow(row.split(delim)); + doStep(); + if (aborted) + return returnable(); + } + else + pushRow(row.split(delim)); + if (preview && i >= preview) + { + data = data.slice(0, preview); + return returnable(true); + } + } + return returnable(); + } + + var nextDelim = input.indexOf(delim, cursor); + var nextNewline = input.indexOf(newline, cursor); + + // Parser loop + for (;;) + { + // Field has opening quote + if (input[cursor] == '"') + { + // Start our search for the closing quote where the cursor is + var quoteSearch = cursor; + + // Skip the opening quote + cursor++; + + for (;;) + { + // Find closing quote + var quoteSearch = input.indexOf('"', quoteSearch+1); + + if (quoteSearch === -1) + { + if (!ignoreLastRow) { + // No closing quote... what a pity + errors.push({ + type: "Quotes", + code: "MissingQuotes", + message: "Quoted field unterminated", + row: data.length, // row has yet to be inserted + index: cursor + }); + } + return finish(); + } + + if (quoteSearch === inputLen-1) + { + // Closing quote at EOF + var value = input.substring(cursor, quoteSearch).replace(/""/g, '"'); + return finish(value); + } + + // If this quote is escaped, it's part of the data; skip it + if (input[quoteSearch+1] == '"') + { + quoteSearch++; + continue; + } + + if (input[quoteSearch+1] == delim) + { + // Closing quote followed by delimiter + row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); + cursor = quoteSearch + 1 + delimLen; + nextDelim = input.indexOf(delim, cursor); + nextNewline = input.indexOf(newline, cursor); + break; + } + + if (input.substr(quoteSearch+1, newlineLen) === newline) + { + // Closing quote followed by newline + row.push(input.substring(cursor, quoteSearch).replace(/""/g, '"')); + saveRow(quoteSearch + 1 + newlineLen); + nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field + + if (stepIsFunction) + { + doStep(); + if (aborted) + return returnable(); + } + + if (preview && data.length >= preview) + return returnable(true); + + break; + } + } + + continue; + } + + // Comment found at start of new line + if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) + { + if (nextNewline == -1) // Comment ends at EOF + return returnable(); + cursor = nextNewline + newlineLen; + nextNewline = input.indexOf(newline, cursor); + nextDelim = input.indexOf(delim, cursor); + continue; + } + + // Next delimiter comes before next newline, so we've reached end of field + if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) + { + row.push(input.substring(cursor, nextDelim)); + cursor = nextDelim + delimLen; + nextDelim = input.indexOf(delim, cursor); + continue; + } + + // End of row + if (nextNewline !== -1) + { + row.push(input.substring(cursor, nextNewline)); + saveRow(nextNewline + newlineLen); + + if (stepIsFunction) + { + doStep(); + if (aborted) + return returnable(); + } + + if (preview && data.length >= preview) + return returnable(true); + + continue; + } + + break; + } + + + return finish(); + + + function pushRow(row) + { + data.push(row); + lastCursor = cursor; + } + + /** + * Appends the remaining input from cursor to the end into + * row, saves the row, calls step, and returns the results. + */ + function finish(value) + { + if (ignoreLastRow) + return returnable(); + if (typeof value === 'undefined') + value = input.substr(cursor); + row.push(value); + cursor = inputLen; // important in case parsing is paused + pushRow(row); + if (stepIsFunction) + doStep(); + return returnable(); + } + + /** + * Appends the current row to the results. It sets the cursor + * to newCursor and finds the nextNewline. The caller should + * take care to execute user's step function and check for + * preview and end parsing if necessary. + */ + function saveRow(newCursor) + { + cursor = newCursor; + pushRow(row); + row = []; + nextNewline = input.indexOf(newline, cursor); + } + + /** Returns an object with the results, errors, and meta. */ + function returnable(stopped) + { + return { + data: data, + errors: errors, + meta: { + delimiter: delim, + linebreak: newline, + aborted: aborted, + truncated: !!stopped, + cursor: lastCursor + (baseIndex || 0) + } + }; + } + + /** Executes the user's step function and resets data & errors. */ + function doStep() + { + step(returnable()); + data = [], errors = []; + } + }; + + /** Sets the abort flag */ + this.abort = function() + { + aborted = true; + }; + + /** Gets the cursor position */ + this.getCharIndex = function() + { + return cursor; + }; + } + + + // If you need to load Papa Parse asynchronously and you also need worker threads, hard-code + // the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358 + function getScriptPath() + { + var scripts = document.getElementsByTagName('script'); + return scripts.length ? scripts[scripts.length - 1].src : ''; + } + + function newWorker() + { + if (!Papa.WORKERS_SUPPORTED) + return false; + if (!LOADED_SYNC && Papa.SCRIPT_PATH === null) + throw new Error( + 'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' + + 'You need to set Papa.SCRIPT_PATH manually.' + ); + var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH; + // Append "papaworker" to the search string to tell papaparse that this is our worker. + workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker'; + var w = new global.Worker(workerUrl); + w.onmessage = mainThreadReceivedMessage; + w.id = workerIdCounter++; + workers[w.id] = w; + return w; + } + + /** Callback when main thread receives a message */ + function mainThreadReceivedMessage(e) + { + var msg = e.data; + var worker = workers[msg.workerId]; + var aborted = false; + + if (msg.error) + worker.userError(msg.error, msg.file); + else if (msg.results && msg.results.data) + { + var abort = function() { + aborted = true; + completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } }); + }; + + var handle = { + abort: abort, + pause: notImplemented, + resume: notImplemented + }; + + if (isFunction(worker.userStep)) + { + for (var i = 0; i < msg.results.data.length; i++) + { + worker.userStep({ + data: [msg.results.data[i]], + errors: msg.results.errors, + meta: msg.results.meta + }, handle); + if (aborted) + break; + } + delete msg.results; // free memory ASAP + } + else if (isFunction(worker.userChunk)) + { + worker.userChunk(msg.results, handle, msg.file); + delete msg.results; + } + } + + if (msg.finished && !aborted) + completeWorker(msg.workerId, msg.results); + } + + function completeWorker(workerId, results) { + var worker = workers[workerId]; + if (isFunction(worker.userComplete)) + worker.userComplete(results); + worker.terminate(); + delete workers[workerId]; + } + + function notImplemented() { + throw "Not implemented."; + } + + /** Callback when worker thread receives a message */ + function workerThreadReceivedMessage(e) + { + var msg = e.data; + + if (typeof Papa.WORKER_ID === 'undefined' && msg) + Papa.WORKER_ID = msg.workerId; + + if (typeof msg.input === 'string') + { + global.postMessage({ + workerId: Papa.WORKER_ID, + results: Papa.parse(msg.input, msg.config), + finished: true + }); + } + else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106) + { + var results = Papa.parse(msg.input, msg.config); + if (results) + global.postMessage({ + workerId: Papa.WORKER_ID, + results: results, + finished: true + }); + } + } + + /** Makes a deep copy of an array or object (mostly) */ + function copy(obj) + { + if (typeof obj !== 'object') + return obj; + var cpy = obj instanceof Array ? [] : {}; + for (var key in obj) + cpy[key] = copy(obj[key]); + return cpy; + } + + function bindFunction(f, self) + { + return function() { f.apply(self, arguments); }; + } + + function isFunction(func) + { + return typeof func === 'function'; + } +})(typeof window !== 'undefined' ? window : this);