diff --git a/.eslintrc.js b/.eslintrc.js index c3ed7c56..97a67265 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -1,8 +1,9 @@ module.exports = { "parserOptions": { - "ecmaVersion": 5 + "ecmaVersion": 6 }, "env": { + "es6": true, "browser": true, "worker": true, "node": true diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 3072b48a..ed0b6d40 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: - node-version: [10.x, 12.x, 14.x, 15.x] + node-version: [20.x, 22.x, 24.x] # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ steps: @@ -26,4 +26,4 @@ jobs: with: node-version: ${{ matrix.node-version }} - run: npm install - - run: npm test + - run: npm run lint && npm run test-node && npm run build diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f00e0fbb..00000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: node_js -node_js: - - "8" - - "9" - - "10" - - "11" diff --git a/README.md b/README.md index 45313c3b..28ab0f8f 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ Parse CSV with JavaScript ======================================== -Papa Parse is the [fastest](http://jsperf.com/javascript-csv-parsers/4) in-browser CSV (or delimited text) parser for JavaScript. It is reliable and correct according to [RFC 4180](https://tools.ietf.org/html/rfc4180), and it comes with these features: +Papa Parse is the fastest in-browser CSV (or delimited text) parser for JavaScript. It is reliable and correct according to [RFC 4180](https://tools.ietf.org/html/rfc4180), and it comes with these features: - Easy to use - Parse CSV files directly (local or over the network) -- Fast mode ([is really fast](http://jsperf.com/javascript-csv-parsers/3)) +- Fast mode - Stream large files (even via HTTP) - Reverse parsing (converts JSON to CSV) - Auto-detect delimiter @@ -23,21 +23,31 @@ Install papaparse is available on [npm](https://www.npmjs.com/package/papaparse). It can be installed with the following command: - - npm install papaparse +```shell +npm install papaparse +``` If you don't want to use npm, [papaparse.min.js](https://unpkg.com/papaparse@latest/papaparse.min.js) can be downloaded to your project source. +Usage +----- +```js +import Papa from 'papaparse'; + +Papa.parse(file, config); + +const csv = Papa.unparse(data[, config]); +``` Homepage & Demo ---------------- -- [Homepage](http://papaparse.com) -- [Demo](http://papaparse.com/demo) +- [Homepage](https://www.papaparse.com) +- [Demo](https://www.papaparse.com/demo) To learn how to use Papa Parse: -- [Documentation](http://papaparse.com/docs) +- [Documentation](https://www.papaparse.com/docs) The website is hosted on [Github Pages](https://pages.github.com/). Its content is also included in the docs folder of this repository. If you want to contribute on it just clone the master of this repository and open a pull request. @@ -52,7 +62,7 @@ Papa Parse can also parse in a node streaming style which makes `.pipe` availabl Get Started ----------- -For usage instructions, see the [homepage](http://papaparse.com) and, for more detail, the [documentation](http://papaparse.com/docs). +For usage instructions, see the [homepage](https://www.papaparse.com) and, for more detail, the [documentation](https://www.papaparse.com/docs). Tests ----- diff --git a/docs/docs.html b/docs/docs.html index da3c8b62..ac39363f 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -303,7 +303,7 @@
Unparse Config Options
delimiter - The delimiting character. It must not be found in Papa.BAD_DELIMITERS. + The delimiting character. Multi-character delimiters are supported. It must not be found in Papa.BAD_DELIMITERS. @@ -343,7 +343,7 @@
Unparse Config Options
escapeFormulae - If true, field values that begin with =, +, -, or @, will be prepended with a ' to defend against injection attacks, because Excel and LibreOffice will automatically parse such cells as formulae. + If true, field values that begin with =, +, -, @, \t, or \r, will be prepended with a ' to defend against injection attacks, because Excel and LibreOffice will automatically parse such cells as formulae. You can override those values by setting this option to a regular expression @@ -450,7 +450,8 @@
Default Config With All Options
beforeFirstChunk: undefined, withCredentials: undefined, transform: undefined, - delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP] + delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP], + skipFirstNLines: 0 }
@@ -470,7 +471,7 @@
Config Options
delimiter - The delimiting character. Leave blank to auto-detect from a list of most common delimiters, or any values passed in through delimitersToGuess. It can be a string or a function. If string, it must be one of length 1. If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter. In both cases it cannot be found in Papa.BAD_DELIMITERS. + The delimiting character. Leave blank to auto-detect from a list of most common delimiters, or any values passed in through delimitersToGuess. It can be a string or a function. If a string, it can be of any length (so multi-character delimiters are supported). If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter. In both cases it cannot be found in Papa.BAD_DELIMITERS. @@ -502,7 +503,8 @@
Config Options
header - If true, the first row of parsed data will be interpreted as field names. An array of field names will be returned in meta, and each row of data will be an object of values keyed by field name instead of a simple array. Rows with a different number of fields from the header row will produce an error. Warning: Duplicate field names will overwrite values in previous fields having the same name. + If true, the first row of parsed data will be interpreted as field names. An array of field names will be returned in meta, and each row of data will be an object of values keyed by field name instead of a simple array. Rows with a different number of fields from the header row will produce an error. + Warning: Duplicated field names will be automatically renamed to avoid values in previous fields having the same name to be overwritten. Renamed fields with original (or transformed by transformHeader) are stored in ParseResult.meta.renamedHeaders @@ -519,7 +521,7 @@
Config Options
dynamicTyping - If true, numeric and boolean data will be converted to their type instead of remaining strings. Numeric data must conform to the definition of a decimal literal. Numerical values greater than 2^53 or less than -2^53 will not be converted to numbers to preserve precision. European-formatted numbers must have commas and dots swapped. If also accepts an object or a function. If object it's values should be a boolean to indicate if dynamic typing should be applied for each column number (or header name if using headers). If it's a function, it should return a boolean value for each field number (or name if using headers) which will be passed as first argument. + If true, numeric and boolean data will be converted to their type instead of remaining strings. Numeric data must conform to the definition of a decimal literal. Numerical values greater than 2^53 or less than -2^53 will not be converted to numbers to preserve precision. European-formatted numbers must have commas and dots swapped. It also accepts an object or a function. If it's an object, its values should be a boolean to indicate if dynamic typing should be applied for each column number (or header name if using headers). If it's a function, it should return a boolean value for each field number (or name if using headers) which will be passed as first argument. @@ -681,6 +683,14 @@
Config Options
An array of delimiters to guess from if the delimiter option is not set. + + + skipFirstNLines + + + To skip first N number of lines when converting a CSV file to JSON + + @@ -759,11 +769,13 @@
Data
[ { "Column 1": "foo", - "Column 2": "bar" + "Column 2": "bar", + "Column 1": "foo1", }, { "Column 1": "abc", - "Column 2": "def" + "Column 2": "def", + "Column 1": "abc1", } ] @@ -811,6 +823,7 @@
Meta
aborted: // Whether process was aborted fields: // Array of field names truncated: // Whether preview consumed all input + renamedHeaders: // Headers that are automatically renamed by the library to avoid duplication. {Column 1_1: 'Column 1' // the later header 'Column 1' was renamed to 'Column 1_1'} }
@@ -861,7 +874,13 @@
Read-Only
Papa.BAD_DELIMITERS - An array of characters that are not allowed as delimiters. + An array of characters that are not allowed as delimiters (\r, \n, ", \ufeff). + + + + Papa.BYTE_ORDER_MARK + + The unicode Byte Order Mark (\ufeff). diff --git a/docs/faq.html b/docs/faq.html index b8ecdb29..403461c1 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -71,7 +71,7 @@
Why use Papa Parse?

Can I use Papa Parse server-side with Node.js?
-

Yes, Paparse supports Node. See our READMEfor further details. +

Yes, Paparse supports Node. See our README for further details.

Does Papa Parse have any dependencies?
diff --git a/docs/index.html b/docs/index.html index ec841e27..93819a48 100644 --- a/docs/index.html +++ b/docs/index.html @@ -175,6 +175,24 @@

Features

People Papa

+
+
+

+ + PapaParse + +   + + PapaParse + +

+

diff --git a/docs/resources/js/lovers.js b/docs/resources/js/lovers.js index 530a1644..1c939510 100644 --- a/docs/resources/js/lovers.js +++ b/docs/resources/js/lovers.js @@ -26,6 +26,18 @@ **/ var peopleLovePapa = [ + { + link: "http://doctempleapp.com", + name: "DocTemple", + description: "helps you effortlessly populate .docx documents with .csv data using scalable templating solution.", + quote: "Papa Parse made it extremely easy to load user CSV files in the browser. Implementing it was a breeze and it works perfect!" + }, + { + link: "https://www.vertex.io", + name: "The no-code database of the future.", + description: "No-code, Postgres-powered admin, internal tools, and backend suite.", + quote: "Vertex uses Papa to power all CSV related features!" + }, { link: "https://www.circlehd.com", name: "CircleHD Enterprise Video Platform", @@ -55,24 +67,6 @@ var peopleLovePapa = [ name: "Wikipedia", description: "uses Papa Parse in VisualEditor to help article editors effortlessly build data tables from text files." }, - { - link: "https://www.webucator.com/webdesign/javascript.cfm", - name: "Webucator", - description: "created a video showing how to use Papa Parse and FileDrop.js to create a drag-and-drop CSV-JSON converter.", - quote: "It's often easy to convert data to CSV. With Papa, it's easy to turn that CSV into JSON." - }, - { - link: "http://www.yolpo.com/social/gist.github?1dbd4556e748bdb830b3&autoplay=1&interimresults=0&failfast=1", - name: "Yolpo", - description: "created a simple regression test for Papa Parse.", - quote: "Papa's API is so intuitive, it took me no time to get it to work." - }, - { - link: "https://www.appstax.com", - name: "Appstax", - description: "uses Papa Parse to import and export CSV data in their visual databrowser.", - quote: "Papa is a great for parsing CSV. And what a great tone of voice - love it!" - }, { link: "https://github.com/Nanofus/novel.js", name: "Novel.js", @@ -97,12 +91,6 @@ var peopleLovePapa = [ description: "is a brand-new messaging app made specifically for busy families. Automatically align all family members when sending text messages to parents in the kindergarten or school or when planning your kids birthday parties.", quote: "With Papa it was a joy to implement our tool for importing messages and places from external systems." }, - { - link: "https://explore.hua-gallery.com", - name: "Hua Explore", - description: "The premier destination for information on Contemporary Chinese Art.", - quote: "Papa makes processing data that galleries send us totally seamless." - }, { link: "https://monei.net", name: "MONEI", @@ -126,5 +114,17 @@ var peopleLovePapa = [ name: "Visa SOP Sample", description: "Providing free guide to international students.", quote: "Use Papa Parse for many of side projects. Super fast and works all the time. Love it!" + }, + { + link: "https://retool.com/", + name: "Retool", + description: "A remarkably fast way to build internal tools.", + quote: "Papa makes it easy for our users to customize CSV parsing to match their business logic." + }, + { + link: "https://www.hellodata.ai/", + name: "HelloData", + description: "Automatic rent surveys with real-time data on over 25M multifamily units nationwide, direct from property websites.", + quote: "Papa Parse makes bulk data imports a breeze! It's helped us easily onboard our largest customers. It's robust and a true game-changer." } ]; diff --git a/docs/resources/js/papaparse.js b/docs/resources/js/papaparse.js index ac02f6d6..9dd7fa27 100644 --- a/docs/resources/js/papaparse.js +++ b/docs/resources/js/papaparse.js @@ -491,6 +491,16 @@ License: MIT this.parseChunk = function(chunk, isFakeChunk) { // First chunk pre-processing + const skipFirstNLines = parseInt(this._config.skipFirstNLines) || 0; + if (this.isFirstChunk && skipFirstNLines > 0) { + let _newline = this._config.newline; + if (!_newline) { + const quoteChar = this._config.quoteChar || '"'; + _newline = this._handle.guessLineEndings(chunk, quoteChar); + } + const splitChunk = chunk.split(_newline); + chunk = [...splitChunk.slice(skipFirstNLines)].join(_newline); + } if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) { var modifiedChunk = this._config.beforeFirstChunk(chunk); @@ -503,7 +513,6 @@ License: MIT // Rejoin the line we likely just split in two by chunking the file var aggregate = this._partialLine + chunk; this._partialLine = ''; - var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); if (this._handle.paused() || this._handle.aborted()) { @@ -1048,7 +1057,7 @@ License: MIT { var quoteChar = _config.quoteChar || '"'; if (!_config.newline) - _config.newline = guessLineEndings(input, quoteChar); + _config.newline = this.guessLineEndings(input, quoteChar); _delimiterError = false; if (!_config.delimiter) @@ -1119,6 +1128,32 @@ License: MIT _input = ''; }; + this.guessLineEndings = function(input, quoteChar) + { + input = input.substr(0, 1024 * 1024); // max length 1 MB + // Replace all the text inside quotes + var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); + input = input.replace(re, ''); + + var r = input.split('\r'); + + var n = input.split('\n'); + + var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length); + + if (r.length === 1 || nAppearsFirst) + return '\n'; + + var numWithN = 0; + for (var i = 0; i < r.length; i++) + { + if (r[i][0] === '\n') + numWithN++; + } + + return numWithN >= r.length / 2 ? '\r\n' : '\r'; + }; + function testEmptyLine(s) { return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0; } @@ -1321,32 +1356,6 @@ License: MIT }; } - function guessLineEndings(input, quoteChar) - { - input = input.substr(0, 1024 * 1024); // max length 1 MB - // Replace all the text inside quotes - var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); - input = input.replace(re, ''); - - var r = input.split('\r'); - - var n = input.split('\n'); - - var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length); - - if (r.length === 1 || nAppearsFirst) - return '\n'; - - var numWithN = 0; - for (var i = 0; i < r.length; i++) - { - if (r[i][0] === '\n') - numWithN++; - } - - return numWithN >= r.length / 2 ? '\r\n' : '\r'; - } - function addError(type, code, msg, row) { _results.errors.push({ diff --git a/package.json b/package.json index 6404a35e..013a4b0c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "papaparse", - "version": "5.3.0", + "version": "5.5.3", "description": "Fast and powerful CSV parser for the browser that supports web workers and streaming large files. Converts CSV to JSON and JSON to CSV.", "keywords": [ "csv", @@ -24,10 +24,10 @@ "multi-threaded", "jquery-plugin" ], - "homepage": "http://papaparse.com", + "homepage": "https://www.papaparse.com/", "repository": { "type": "git", - "url": "https://github.com/mholt/PapaParse.git" + "url": "git+https://github.com/mholt/PapaParse.git" }, "author": { "name": "Matthew Holt", @@ -40,15 +40,16 @@ "chai": "^4.2.0", "connect": "^3.3.3", "eslint": "^4.19.1", - "grunt": "^1.0.2", - "grunt-contrib-uglify": "^3.3.0", + "grunt": "^1.5.2", + "grunt-contrib-uglify": "^5.2.0", "mocha": "^5.2.0", - "mocha-headless-chrome": "^2.0.1", + "mocha-headless-chrome": "^4.0.0", "open": "7.0.0", "serve-static": "^1.7.1" }, "scripts": { "lint": "eslint --no-ignore papaparse.js Gruntfile.js .eslintrc.js 'tests/**/*.js'", + "build": "grunt build", "test-browser": "node tests/test.js", "test-mocha-headless-chrome": "node tests/test.js --mocha-headless-chrome", "test-node": "mocha tests/node-tests.js tests/test-cases.js", diff --git a/papaparse.js b/papaparse.js index 26db15cf..2c7fd178 100755 --- a/papaparse.js +++ b/papaparse.js @@ -1,6 +1,6 @@ /* @license Papa Parse -v5.3.0 +v5.5.3 https://github.com/mholt/PapaParse License: MIT */ @@ -49,11 +49,12 @@ License: MIT function getWorkerBlob() { var URL = global.URL || global.webkitURL || null; var code = moduleFactory.toString(); - return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'}))); + return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ", '(', code, ')();'], {type: 'text/javascript'}))); } var IS_WORKER = !global.document && !!global.postMessage, - IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol); + IS_PAPA_WORKER = global.IS_PAPA_WORKER || false; + var workers = {}, workerIdCounter = 0; var Papa = {}; @@ -184,8 +185,13 @@ License: MIT global.onmessage = workerThreadReceivedMessage; } - - + // Strip character from UTF-8 BOM encoded files that cause issue parsing the file + function stripBom(string) { + if (string.charCodeAt(0) === 0xfeff) { + return string.slice(1); + } + return string; + } function CsvToJson(_input, _config) { @@ -234,6 +240,7 @@ License: MIT } else if (typeof _input === 'string') { + _input = stripBom(_input); if (_config.download) streamer = new NetworkStreamer(_config); else @@ -307,7 +314,7 @@ License: MIT if (Array.isArray(_input.data)) { if (!_input.fields) - _input.fields = _input.meta && _input.meta.fields; + _input.fields = _input.meta && _input.meta.fields || _columns; if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) @@ -367,11 +374,13 @@ License: MIT _escapedQuote = _config.escapeChar + _quoteChar; } - if (typeof _config.escapeFormulae === 'boolean') + if (_config.escapeFormulae instanceof RegExp) { _escapeFormulae = _config.escapeFormulae; + } else if (typeof _config.escapeFormulae === 'boolean' && _config.escapeFormulae) { + _escapeFormulae = /^[=+\-@\t\r].*$/; + } } - /** The double for loop that iterates the data and writes out a CSV string including header row */ function serialize(fields, data, skipEmptyLines) { @@ -444,13 +453,17 @@ License: MIT if (str.constructor === Date) return JSON.stringify(str).slice(1, 25); - if (_escapeFormulae === true && typeof str === "string" && (str.match(/^[=+\-@].*$/) !== null)) { + var needsQuotes = false; + + if (_escapeFormulae && typeof str === "string" && _escapeFormulae.test(str)) { str = "'" + str; + needsQuotes = true; } var escapedQuoteStr = str.toString().replace(quoteCharRegex, _escapedQuote); - var needsQuotes = (typeof _quotes === 'boolean' && _quotes) + needsQuotes = needsQuotes + || _quotes === true || (typeof _quotes === 'function' && _quotes(str, col)) || (Array.isArray(_quotes) && _quotes[col]) || hasAny(escapedQuoteStr, Papa.BAD_DELIMITERS) @@ -470,6 +483,7 @@ License: MIT } } + /** ChunkStreamer is the base prototype for various streamer implementations. */ function ChunkStreamer(config) { @@ -494,6 +508,16 @@ License: MIT this.parseChunk = function(chunk, isFakeChunk) { // First chunk pre-processing + const skipFirstNLines = parseInt(this._config.skipFirstNLines) || 0; + if (this.isFirstChunk && skipFirstNLines > 0) { + let _newline = this._config.newline; + if (!_newline) { + const quoteChar = this._config.quoteChar || '"'; + _newline = this._handle.guessLineEndings(chunk, quoteChar); + } + const splitChunk = chunk.split(_newline); + chunk = [...splitChunk.slice(skipFirstNLines)].join(_newline); + } if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) { var modifiedChunk = this._config.beforeFirstChunk(chunk); @@ -506,7 +530,6 @@ License: MIT // Rejoin the line we likely just split in two by chunking the file var aggregate = this._partialLine + chunk; this._partialLine = ''; - var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); if (this._handle.paused() || this._handle.aborted()) { @@ -1008,7 +1031,7 @@ License: MIT var MAX_FLOAT = Math.pow(2, 53); var MIN_FLOAT = -MAX_FLOAT; var FLOAT = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/; - var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/; + var ISO_DATE = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/; var self = this; var _stepCounter = 0; // Number of times step was called (number of rows parsed) var _rowCounter = 0; // Number of rows that have been parsed so far @@ -1061,7 +1084,7 @@ License: MIT { var quoteChar = _config.quoteChar || '"'; if (!_config.newline) - _config.newline = guessLineEndings(input, quoteChar); + _config.newline = this.guessLineEndings(input, quoteChar); _delimiterError = false; if (!_config.delimiter) @@ -1135,6 +1158,32 @@ License: MIT _input = ''; }; + this.guessLineEndings = function(input, quoteChar) + { + input = input.substring(0, 1024 * 1024); // max length 1 MB + // Replace all the text inside quotes + var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); + input = input.replace(re, ''); + + var r = input.split('\r'); + + var n = input.split('\n'); + + var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length); + + if (r.length === 1 || nAppearsFirst) + return '\n'; + + var numWithN = 0; + for (var i = 0; i < r.length; i++) + { + if (r[i][0] === '\n') + numWithN++; + } + + return numWithN >= r.length / 2 ? '\r\n' : '\r'; + }; + function testEmptyLine(s) { return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0; } @@ -1159,9 +1208,9 @@ License: MIT if (_config.skipEmptyLines) { - for (var i = 0; i < _results.data.length; i++) - if (testEmptyLine(_results.data[i])) - _results.data.splice(i--, 1); + _results.data = _results.data.filter(function(d) { + return !testEmptyLine(d); + }); } if (needsHeaderRow()) @@ -1182,6 +1231,7 @@ License: MIT function addHeader(header, i) { + header = stripBom(header); if (isFunction(_config.transformHeader)) header = _config.transformHeader(header, i); @@ -1341,32 +1391,6 @@ License: MIT }; } - function guessLineEndings(input, quoteChar) - { - input = input.substring(0, 1024 * 1024); // max length 1 MB - // Replace all the text inside quotes - var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); - input = input.replace(re, ''); - - var r = input.split('\r'); - - var n = input.split('\n'); - - var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length); - - if (r.length === 1 || nAppearsFirst) - return '\n'; - - var numWithN = 0; - for (var i = 0; i < r.length; i++) - { - if (r[i][0] === '\n') - numWithN++; - } - - return numWithN >= r.length / 2 ? '\r\n' : '\r'; - } - function addError(type, code, msg, row) { var error = { @@ -1399,8 +1423,10 @@ License: MIT var preview = config.preview; var fastMode = config.fastMode; var quoteChar; - /** Allows for no quoteChar by setting quoteChar to undefined in config */ - if (config.quoteChar === undefined) { + var renamedHeaders = null; + var headerParsed = false; + + if (config.quoteChar === undefined || config.quoteChar === null) { quoteChar = '"'; } else { quoteChar = config.quoteChar; @@ -1460,6 +1486,7 @@ License: MIT { row = rows[i]; cursor += row.length; + if (i !== rows.length - 1) cursor += newline.length; else if (ignoreLastRow) @@ -1555,7 +1582,7 @@ License: MIT var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo); // Closing quote followed by delimiter or 'unnecessary spaces + delimiter' - if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) + if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndDelimiter, delimLen) === delim) { row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; @@ -1654,7 +1681,6 @@ License: MIT break; } - return finish(); @@ -1714,6 +1740,48 @@ License: MIT /** Returns an object with the results, errors, and meta. */ function returnable(stopped) { + if (config.header && !baseIndex && data.length && !headerParsed) + { + const result = data[0]; + const headerCount = Object.create(null); // To track the count of each base header + const usedHeaders = new Set(result); // To track used headers and avoid duplicates + let duplicateHeaders = false; + + for (let i = 0; i < result.length; i++) { + let header = stripBom(result[i]); + if (isFunction(config.transformHeader)) + header = config.transformHeader(header, i); + + if (!headerCount[header]) { + headerCount[header] = 1; + result[i] = header; + } else { + let newHeader; + let suffixCount = headerCount[header]; + + // Find a unique new header + do { + newHeader = `${header}_${suffixCount}`; + suffixCount++; + } while (usedHeaders.has(newHeader)); + + usedHeaders.add(newHeader); // Mark this new Header as used + result[i] = newHeader; + headerCount[header]++; + duplicateHeaders = true; + if (renamedHeaders === null) { + renamedHeaders = {}; + } + renamedHeaders[newHeader] = header; + } + + usedHeaders.add(header); // Ensure the original header is marked as used + } + if (duplicateHeaders) { + console.warn('Duplicate headers found and renamed.'); + } + headerParsed = true; + } return { data: data, errors: errors, @@ -1722,7 +1790,8 @@ License: MIT linebreak: newline, aborted: aborted, truncated: !!stopped, - cursor: lastCursor + (baseIndex || 0) + cursor: lastCursor + (baseIndex || 0), + renamedHeaders: renamedHeaders } }; } @@ -1865,7 +1934,6 @@ License: MIT { return function() { f.apply(self, arguments); }; } - function isFunction(func) { return typeof func === 'function'; diff --git a/papaparse.min.js b/papaparse.min.js index a92afc17..f3141104 100644 --- a/papaparse.min.js +++ b/papaparse.min.js @@ -1,7 +1,7 @@ /* @license Papa Parse -v5.3.0 +v5.5.3 https://github.com/mholt/PapaParse License: MIT */ -!function(e,t){"function"==typeof define&&define.amd?define([],t):"object"==typeof module&&"undefined"!=typeof exports?module.exports=t():e.Papa=t()}(this,function s(){"use strict";var f="undefined"!=typeof self?self:"undefined"!=typeof window?window:void 0!==f?f:{};var n=!f.document&&!!f.postMessage,o=n&&/blob:/i.test((f.location||{}).protocol),a={},h=0,b={parse:function(e,t){var i=(t=t||{}).dynamicTyping||!1;U(i)&&(t.dynamicTypingFunction=i,i={});if(t.dynamicTyping=i,t.transform=!!U(t.transform)&&t.transform,t.worker&&b.WORKERS_SUPPORTED){var r=function(){if(!b.WORKERS_SUPPORTED)return!1;var e=(i=f.URL||f.webkitURL||null,r=s.toString(),b.BLOB_URL||(b.BLOB_URL=i.createObjectURL(new Blob(["(",r,")();"],{type:"text/javascript"})))),t=new f.Worker(e);var i,r;return t.onmessage=m,t.id=h++,a[t.id]=t}();return r.userStep=t.step,r.userChunk=t.chunk,r.userComplete=t.complete,r.userError=t.error,t.step=U(t.step),t.chunk=U(t.chunk),t.complete=U(t.complete),t.error=U(t.error),delete t.worker,void r.postMessage({input:e,config:t,workerId:r.id})}var n=null;b.NODE_STREAM_INPUT,"string"==typeof e?n=t.download?new l(t):new p(t):!0===e.readable&&U(e.read)&&U(e.on)?n=new g(t):(f.File&&e instanceof File||e instanceof Object)&&(n=new c(t));return n.stream(e)},unparse:function(e,t){var n=!1,m=!0,_=",",v="\r\n",s='"',a=s+s,i=!1,r=null,o=!1;!function(){if("object"!=typeof t)return;"string"!=typeof t.delimiter||b.BAD_DELIMITERS.filter(function(e){return-1!==t.delimiter.indexOf(e)}).length||(_=t.delimiter);("boolean"==typeof t.quotes||"function"==typeof t.quotes||Array.isArray(t.quotes))&&(n=t.quotes);"boolean"!=typeof t.skipEmptyLines&&"string"!=typeof t.skipEmptyLines||(i=t.skipEmptyLines);"string"==typeof t.newline&&(v=t.newline);"string"==typeof t.quoteChar&&(s=t.quoteChar);"boolean"==typeof t.header&&(m=t.header);if(Array.isArray(t.columns)){if(0===t.columns.length)throw new Error("Option columns is empty");r=t.columns}void 0!==t.escapeChar&&(a=t.escapeChar+s);"boolean"==typeof t.escapeFormulae&&(o=t.escapeFormulae)}();var h=new RegExp(q(s),"g");"string"==typeof e&&(e=JSON.parse(e));if(Array.isArray(e)){if(!e.length||Array.isArray(e[0]))return f(null,e,i);if("object"==typeof e[0])return f(r||u(e[0]),e,i)}else if("object"==typeof e)return"string"==typeof e.data&&(e.data=JSON.parse(e.data)),Array.isArray(e.data)&&(e.fields||(e.fields=e.meta&&e.meta.fields),e.fields||(e.fields=Array.isArray(e.data[0])?e.fields:u(e.data[0])),Array.isArray(e.data[0])||"object"==typeof e.data[0]||(e.data=[e.data])),f(e.fields||[],e.data||[],i);throw new Error("Unable to serialize unrecognized input");function u(e){if("object"!=typeof e)return[];var t=[];for(var i in e)t.push(i);return t}function f(e,t,i){var r="";"string"==typeof e&&(e=JSON.parse(e)),"string"==typeof t&&(t=JSON.parse(t));var n=Array.isArray(e)&&0=this._config.preview;if(o)f.postMessage({results:n,workerId:b.WORKER_ID,finished:a});else if(U(this._config.chunk)&&!t){if(this._config.chunk(n,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);n=void 0,this._completeResults=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(n.data),this._completeResults.errors=this._completeResults.errors.concat(n.errors),this._completeResults.meta=n.meta),this._completed||!a||!U(this._config.complete)||n&&n.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),a||n&&n.meta.paused||this._nextChunk(),n}this._halted=!0},this._sendError=function(e){U(this._config.error)?this._config.error(e):o&&this._config.error&&f.postMessage({workerId:b.WORKER_ID,error:e,finished:!1})}}function l(e){var r;(e=e||{}).chunkSize||(e.chunkSize=b.RemoteChunkSize),u.call(this,e),this._nextChunk=n?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(r=new XMLHttpRequest,this._config.withCredentials&&(r.withCredentials=this._config.withCredentials),n||(r.onload=y(this._chunkLoaded,this),r.onerror=y(this._chunkError,this)),r.open(this._config.downloadRequestBody?"POST":"GET",this._input,!n),this._config.downloadRequestHeaders){var e=this._config.downloadRequestHeaders;for(var t in e)r.setRequestHeader(t,e[t])}if(this._config.chunkSize){var i=this._start+this._config.chunkSize-1;r.setRequestHeader("Range","bytes="+this._start+"-"+i)}try{r.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}n&&0===r.status&&this._chunkError()}},this._chunkLoaded=function(){4===r.readyState&&(r.status<200||400<=r.status?this._chunkError():(this._start+=this._config.chunkSize?this._config.chunkSize:r.responseText.length,this._finished=!this._config.chunkSize||this._start>=function(e){var t=e.getResponseHeader("Content-Range");if(null===t)return-1;return parseInt(t.substring(t.lastIndexOf("/")+1))}(r),this.parseChunk(r.responseText)))},this._chunkError=function(e){var t=r.statusText||e;this._sendError(new Error(t))}}function c(e){var r,n;(e=e||{}).chunkSize||(e.chunkSize=b.LocalChunkSize),u.call(this,e);var s="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,n=e.slice||e.webkitSlice||e.mozSlice,s?((r=new FileReader).onload=y(this._chunkLoaded,this),r.onerror=y(this._chunkError,this)):r=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(r.error)}}function p(e){var i;u.call(this,e=e||{}),this.stream=function(e){return i=e,this._nextChunk()},this._nextChunk=function(){if(!this._finished){var e,t=this._config.chunkSize;return t?(e=i.substring(0,t),i=i.substring(t)):(e=i,i=""),this._finished=!i,this.parseChunk(e)}}}function g(e){u.call(this,e=e||{});var t=[],i=!0,r=!1;this.pause=function(){u.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){u.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){r&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):i=!0},this._streamData=y(function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),i&&(i=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}},this),this._streamError=y(function(e){this._streamCleanUp(),this._sendError(e)},this),this._streamEnd=y(function(){this._streamCleanUp(),r=!0,this._streamData("")},this),this._streamCleanUp=y(function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)},this)}function i(_){var a,o,h,r=Math.pow(2,53),n=-r,s=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)(e[-+]?\d+)?\s*$/,u=/(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/,t=this,i=0,f=0,d=!1,e=!1,l=[],c={data:[],errors:[],meta:{}};if(U(_.step)){var p=_.step;_.step=function(e){if(c=e,m())g();else{if(g(),0===c.data.length)return;i+=e.data.length,_.preview&&i>_.preview?o.abort():(c.data=c.data[0],p(c,t))}}}function v(e){return"greedy"===_.skipEmptyLines?""===e.join("").trim():1===e.length&&0===e[0].length}function g(){if(c&&h&&(k("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to '"+b.DefaultDelimiter+"'"),h=!1),_.skipEmptyLines)for(var e=0;e=l.length?"__parsed_extra":l[i]),_.transform&&(s=_.transform(s,n)),s=y(n,s),"__parsed_extra"===n?(r[n]=r[n]||[],r[n].push(s)):r[n]=s}return _.header&&(i>l.length?k("FieldMismatch","TooManyFields","Too many fields: expected "+l.length+" fields but parsed "+i,f+t):i=r.length/2?"\r\n":"\r"}(e,r)),h=!1,_.delimiter)U(_.delimiter)&&(_.delimiter=_.delimiter(e),c.meta.delimiter=_.delimiter);else{var n=function(e,t,i,r,n){var s,a,o,h;n=n||[",","\t","|",";",b.RECORD_SEP,b.UNIT_SEP];for(var u=0;u=L)return R(!0)}else for(_=M,M++;;){if(-1===(_=a.indexOf(O,_+1)))return i||u.push({type:"Quotes",code:"MissingQuotes",message:"Quoted field unterminated",row:h.length,index:M}),E();if(_===r-1)return E(a.substring(M,_).replace(m,O));if(O!==z||a[_+1]!==z){if(O===z||0===_||a[_-1]!==z){-1!==p&&p<_+1&&(p=a.indexOf(D,_+1)),-1!==g&&g<_+1&&(g=a.indexOf(I,_+1));var y=w(-1===g?p:Math.min(p,g));if(a[_+1+y]===D){f.push(a.substring(M,_).replace(m,O)),a[M=_+1+y+e]!==O&&(_=a.indexOf(O,M)),p=a.indexOf(D,M),g=a.indexOf(I,M);break}var k=w(g);if(a.substring(_+1+k,_+1+k+n)===I){if(f.push(a.substring(M,_).replace(m,O)),C(_+1+k+n),p=a.indexOf(D,M),_=a.indexOf(O,M),o&&(S(),j))return R();if(L&&h.length>=L)return R(!0);break}u.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:h.length,index:M}),_++}}else _++}return E();function b(e){h.push(e),d=M}function w(e){var t=0;if(-1!==e){var i=a.substring(_+1,e);i&&""===i.trim()&&(t=i.length)}return t}function E(e){return i||(void 0===e&&(e=a.substring(M)),f.push(e),M=r,b(f),o&&S()),R()}function C(e){M=e,b(f),f=[],g=a.indexOf(I,M)}function R(e){return{data:h,errors:u,meta:{delimiter:D,linebreak:I,aborted:j,truncated:!!e,cursor:d+(t||0)}}}function S(){A(R()),h=[],u=[]}function x(e,t,i){var r={nextDelim:void 0,quoteSearch:void 0},n=a.indexOf(O,t+1);if(t{"function"==typeof define&&define.amd?define([],t):"object"==typeof module&&"undefined"!=typeof exports?module.exports=t():e.Papa=t()})(this,function r(){var n="undefined"!=typeof self?self:"undefined"!=typeof window?window:void 0!==n?n:{};var d,s=!n.document&&!!n.postMessage,a=n.IS_PAPA_WORKER||!1,o={},h=0,v={};function u(e){this._handle=null,this._finished=!1,this._completed=!1,this._halted=!1,this._input=null,this._baseIndex=0,this._partialLine="",this._rowCount=0,this._start=0,this._nextChunk=null,this.isFirstChunk=!0,this._completeResults={data:[],errors:[],meta:{}},function(e){var t=b(e);t.chunkSize=parseInt(t.chunkSize),e.step||e.chunk||(t.chunkSize=null);this._handle=new i(t),(this._handle.streamer=this)._config=t}.call(this,e),this.parseChunk=function(t,e){var i=parseInt(this._config.skipFirstNLines)||0;if(this.isFirstChunk&&0=this._config.preview);if(a)n.postMessage({results:r,workerId:v.WORKER_ID,finished:i});else if(U(this._config.chunk)&&!e){if(this._config.chunk(r,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);this._completeResults=r=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(r.data),this._completeResults.errors=this._completeResults.errors.concat(r.errors),this._completeResults.meta=r.meta),this._completed||!i||!U(this._config.complete)||r&&r.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),i||r&&r.meta.paused||this._nextChunk(),r}this._halted=!0},this._sendError=function(e){U(this._config.error)?this._config.error(e):a&&this._config.error&&n.postMessage({workerId:v.WORKER_ID,error:e,finished:!1})}}function f(e){var r;(e=e||{}).chunkSize||(e.chunkSize=v.RemoteChunkSize),u.call(this,e),this._nextChunk=s?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(r=new XMLHttpRequest,this._config.withCredentials&&(r.withCredentials=this._config.withCredentials),s||(r.onload=y(this._chunkLoaded,this),r.onerror=y(this._chunkError,this)),r.open(this._config.downloadRequestBody?"POST":"GET",this._input,!s),this._config.downloadRequestHeaders){var e,t=this._config.downloadRequestHeaders;for(e in t)r.setRequestHeader(e,t[e])}var i;this._config.chunkSize&&(i=this._start+this._config.chunkSize-1,r.setRequestHeader("Range","bytes="+this._start+"-"+i));try{r.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}s&&0===r.status&&this._chunkError()}},this._chunkLoaded=function(){4===r.readyState&&(r.status<200||400<=r.status?this._chunkError():(this._start+=this._config.chunkSize||r.responseText.length,this._finished=!this._config.chunkSize||this._start>=(e=>null!==(e=e.getResponseHeader("Content-Range"))?parseInt(e.substring(e.lastIndexOf("/")+1)):-1)(r),this.parseChunk(r.responseText)))},this._chunkError=function(e){e=r.statusText||e;this._sendError(new Error(e))}}function l(e){(e=e||{}).chunkSize||(e.chunkSize=v.LocalChunkSize),u.call(this,e);var i,r,n="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,r=e.slice||e.webkitSlice||e.mozSlice,n?((i=new FileReader).onload=y(this._chunkLoaded,this),i.onerror=y(this._chunkError,this)):i=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(i.error)}}function c(e){var i;u.call(this,e=e||{}),this.stream=function(e){return i=e,this._nextChunk()},this._nextChunk=function(){var e,t;if(!this._finished)return e=this._config.chunkSize,i=e?(t=i.substring(0,e),i.substring(e)):(t=i,""),this._finished=!i,this.parseChunk(t)}}function p(e){u.call(this,e=e||{});var t=[],i=!0,r=!1;this.pause=function(){u.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){u.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){r&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):i=!0},this._streamData=y(function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),i&&(i=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}},this),this._streamError=y(function(e){this._streamCleanUp(),this._sendError(e)},this),this._streamEnd=y(function(){this._streamCleanUp(),r=!0,this._streamData("")},this),this._streamCleanUp=y(function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)},this)}function i(m){var n,s,a,t,o=Math.pow(2,53),h=-o,u=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/,d=/^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/,i=this,r=0,f=0,l=!1,e=!1,c=[],p={data:[],errors:[],meta:{}};function y(e){return"greedy"===m.skipEmptyLines?""===e.join("").trim():1===e.length&&0===e[0].length}function g(){if(p&&a&&(k("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to '"+v.DefaultDelimiter+"'"),a=!1),m.skipEmptyLines&&(p.data=p.data.filter(function(e){return!y(e)})),_()){if(p)if(Array.isArray(p.data[0])){for(var e=0;_()&&e(e=>(m.dynamicTypingFunction&&void 0===m.dynamicTyping[e]&&(m.dynamicTyping[e]=m.dynamicTypingFunction(e)),!0===(m.dynamicTyping[e]||m.dynamicTyping)))(e)?"true"===t||"TRUE"===t||"false"!==t&&"FALSE"!==t&&((e=>{if(u.test(e)){e=parseFloat(e);if(h=c.length?"__parsed_extra":c[r]:n,s=m.transform?m.transform(s,n):s);"__parsed_extra"===n?(i[n]=i[n]||[],i[n].push(s)):i[n]=s}return m.header&&(r>c.length?k("FieldMismatch","TooManyFields","Too many fields: expected "+c.length+" fields but parsed "+r,f+t):rm.preview?s.abort():(p.data=p.data[0],t(p,i))))}),this.parse=function(e,t,i){var r=m.quoteChar||'"',r=(m.newline||(m.newline=this.guessLineEndings(e,r)),a=!1,m.delimiter?U(m.delimiter)&&(m.delimiter=m.delimiter(e),p.meta.delimiter=m.delimiter):((r=((e,t,i,r,n)=>{var s,a,o,h;n=n||[",","\t","|",";",v.RECORD_SEP,v.UNIT_SEP];for(var u=0;u=i.length/2?"\r\n":"\r"}}function P(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function E(C){var S=(C=C||{}).delimiter,O=C.newline,x=C.comments,I=C.step,A=C.preview,T=C.fastMode,D=null,L=!1,F=null==C.quoteChar?'"':C.quoteChar,j=F;if(void 0!==C.escapeChar&&(j=C.escapeChar),("string"!=typeof S||-1=A)return w(!0);break}u.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:h.length,index:z}),m++}}else if(x&&0===d.length&&i.substring(z,z+a)===x){if(-1===g)return w();z=g+s,g=i.indexOf(O,z),p=i.indexOf(S,z)}else if(-1!==p&&(p=A)return w(!0)}return E();function k(e){h.push(e),f=z}function v(e){var t=0;return t=-1!==e&&(e=i.substring(m+1,e))&&""===e.trim()?e.length:t}function E(e){return r||(void 0===e&&(e=i.substring(z)),d.push(e),z=n,k(d),o&&R()),w()}function b(e){z=e,k(d),d=[],g=i.indexOf(O,z)}function w(e){if(C.header&&!t&&h.length&&!L){var s=h[0],a=Object.create(null),o=new Set(s);let n=!1;for(let r=0;r65279!==e.charCodeAt(0)?e:e.slice(1))(e),i=new(t.download?f:c)(t)):!0===e.readable&&U(e.read)&&U(e.on)?i=new p(t):(n.File&&e instanceof File||e instanceof Object)&&(i=new l(t)),i.stream(e);(i=(()=>{var e;return!!v.WORKERS_SUPPORTED&&(e=(()=>{var e=n.URL||n.webkitURL||null,t=r.toString();return v.BLOB_URL||(v.BLOB_URL=e.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ","(",t,")();"],{type:"text/javascript"})))})(),(e=new n.Worker(e)).onmessage=g,e.id=h++,o[e.id]=e)})()).userStep=t.step,i.userChunk=t.chunk,i.userComplete=t.complete,i.userError=t.error,t.step=U(t.step),t.chunk=U(t.chunk),t.complete=U(t.complete),t.error=U(t.error),delete t.worker,i.postMessage({input:e,config:t,workerId:i.id})},v.unparse=function(e,t){var n=!1,_=!0,m=",",y="\r\n",s='"',a=s+s,i=!1,r=null,o=!1,h=((()=>{if("object"==typeof t){if("string"!=typeof t.delimiter||v.BAD_DELIMITERS.filter(function(e){return-1!==t.delimiter.indexOf(e)}).length||(m=t.delimiter),"boolean"!=typeof t.quotes&&"function"!=typeof t.quotes&&!Array.isArray(t.quotes)||(n=t.quotes),"boolean"!=typeof t.skipEmptyLines&&"string"!=typeof t.skipEmptyLines||(i=t.skipEmptyLines),"string"==typeof t.newline&&(y=t.newline),"string"==typeof t.quoteChar&&(s=t.quoteChar),"boolean"==typeof t.header&&(_=t.header),Array.isArray(t.columns)){if(0===t.columns.length)throw new Error("Option columns is empty");r=t.columns}void 0!==t.escapeChar&&(a=t.escapeChar+s),t.escapeFormulae instanceof RegExp?o=t.escapeFormulae:"boolean"==typeof t.escapeFormulae&&t.escapeFormulae&&(o=/^[=+\-@\t\r].*$/)}})(),new RegExp(P(s),"g"));"string"==typeof e&&(e=JSON.parse(e));if(Array.isArray(e)){if(!e.length||Array.isArray(e[0]))return u(null,e,i);if("object"==typeof e[0])return u(r||Object.keys(e[0]),e,i)}else if("object"==typeof e)return"string"==typeof e.data&&(e.data=JSON.parse(e.data)),Array.isArray(e.data)&&(e.fields||(e.fields=e.meta&&e.meta.fields||r),e.fields||(e.fields=Array.isArray(e.data[0])?e.fields:"object"==typeof e.data[0]?Object.keys(e.data[0]):[]),Array.isArray(e.data[0])||"object"==typeof e.data[0]||(e.data=[e.data])),u(e.fields||[],e.data||[],i);throw new Error("Unable to serialize unrecognized input");function u(e,t,i){var r="",n=("string"==typeof e&&(e=JSON.parse(e)),"string"==typeof t&&(t=JSON.parse(t)),Array.isArray(e)&&0{for(var i=0;iPapa Parse Player + diff --git a/player/player.js b/player/player.js index 8150de64..f8b8e3ac 100644 --- a/player/player.js +++ b/player/player.js @@ -108,6 +108,7 @@ function buildConfig() skipEmptyLines: $('#skipEmptyLines').prop('checked'), chunk: $('#chunk').prop('checked') ? chunkFn : undefined, beforeFirstChunk: undefined, + skipFirstNLines: $('#skipFirstNLines').val() }; function getLineEnding() diff --git a/tests/.eslintrc.js b/tests/.eslintrc.js index 8c8fc0c9..f75898e0 100644 --- a/tests/.eslintrc.js +++ b/tests/.eslintrc.js @@ -1,5 +1,8 @@ module.exports = { "extends": ["../.eslintrc.js"], + "parserOptions": { + "ecmaVersion": 8 + }, "env": { "mocha": true }, diff --git a/tests/node-tests.js b/tests/node-tests.js index 9c57a225..cad8058e 100644 --- a/tests/node-tests.js +++ b/tests/node-tests.js @@ -5,6 +5,7 @@ var Papa = require("../papaparse.js"); var fs = require('fs'); var assert = require('assert'); var longSampleRawCsv = fs.readFileSync(__dirname + '/long-sample.csv', 'utf8'); +var utf8BomSampleRawCsv = fs.readFileSync(__dirname + '/utf-8-bom-sample.csv', 'utf8'); function assertLongSampleParsedCorrectly(parsedCsv) { assert.equal(8, parsedCsv.data.length); @@ -31,6 +32,7 @@ function assertLongSampleParsedCorrectly(parsedCsv) { "linebreak": "\n", "aborted": false, "truncated": false, + renamedHeaders: null, "cursor": 1209 }); assert.equal(parsedCsv.errors.length, 0); @@ -287,4 +289,14 @@ describe('PapaParse', function() { } }); }); + + it('handles utf-8 BOM encoded files', function(done) { + Papa.parse(utf8BomSampleRawCsv, { + header: true, + complete: function(parsedCsv) { + assert.deepEqual(parsedCsv.data[0], { A: 'X', B: 'Y', C: 'Z' }); + done(); + } + }); + }); }); diff --git a/tests/test-cases.js b/tests/test-cases.js index 8d15b291..3a090e9f 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -13,6 +13,8 @@ var UNIT_SEP = String.fromCharCode(31); var FILES_ENABLED = false; try { new File([""], ""); // eslint-disable-line no-new + // Required since Node20 as it ads a FileGlobal but not a FileReaderSync + new FileReaderSync(); // eslint-disable-line no-new FILES_ENABLED = true; } catch (e) {} // safari, ie @@ -29,7 +31,8 @@ var CORE_PARSER_TESTS = [ input: 'A,b,c', expected: { data: [['A', 'b', 'c']], - errors: [] + errors: [], + meta: {delimiter: ',', renamedHeaders: null} } }, { @@ -585,7 +588,60 @@ var CORE_PARSER_TESTS = [ data: [['a', 'b', 'c'], ['']], errors: [] } - } + }, + { + description: "Simple duplicated header names", + input: 'A,A,A,A\n1,2,3,4', + config: { header: true }, + expected: { + data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']], + errors: [], + meta: { + renamedHeaders: {A_1: 'A', A_2: 'A', A_3: 'A'}, + cursor: 15 + } + } + }, + { + description: "Duplicated header names with headerTransform", + input: 'A,A,A,A\n1,2,3,4', + config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } }, + expected: { + data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']], + errors: [], + meta: { + renamedHeaders: {a_1: 'a', a_2: 'a', a_3: 'a'}, + cursor: 15 + } + } + }, + { + description: "Duplicated header names existing column", + input: 'c,c,c,c_1\n1,2,3,4', + config: { header: true }, + expected: { + data: [['c', 'c_2', 'c_3', 'c_1'], ['1', '2', '3', '4']], + errors: [], + meta: { + renamedHeaders: {c_2: 'c', c_3: 'c'}, + cursor: 17 + } + } + }, + { + description: "Duplicate header names with __proto__ field", + input: '__proto__,__proto__,__proto__\n1,2,3', + config: { header: true }, + expected: { + data: [['__proto__', '__proto___1', '__proto___2'], ['1', '2', '3']], + errors: [], + meta: { + renamedHeaders: {__proto___1: '__proto__', __proto___2: '__proto__'}, + cursor: 35 + } + } + }, + ]; describe('Core Parser Tests', function() { @@ -594,6 +650,7 @@ describe('Core Parser Tests', function() { var actual = new Papa.Parser(test.config).parse(test.input); assert.deepEqual(actual.errors, test.expected.errors); assert.deepEqual(actual.data, test.expected.data); + assert.deepNestedInclude(actual.meta, test.expected.meta || {}); }); } @@ -847,6 +904,16 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Multi-character delimiter (length 2) with quoted field", + input: 'a, b, "c, e", d', + config: { delimiter: ", " }, + notes: "The quotes must be immediately adjacent to the delimiter to indicate a quoted field", + expected: { + data: [['a', 'b', 'c, e', 'd']], + errors: [] + } + }, { description: "Callback delimiter", input: 'a$ b$ c', @@ -997,6 +1064,15 @@ var PARSE_TESTS = [ errors: [] } }, + { + description: "Dynamic typing skips ISO date strings ocurring in other strings", + input: 'ISO date,String with ISO date\r\n2018-05-04T21:08:03.269Z,The date is 2018-05-04T21:08:03.269Z\r\n2018-05-08T15:20:22.642Z,The date is 2018-05-08T15:20:22.642Z', + config: { dynamicTyping: true }, + expected: { + data: [["ISO date", "String with ISO date"], [new Date("2018-05-04T21:08:03.269Z"), "The date is 2018-05-04T21:08:03.269Z"], [new Date("2018-05-08T15:20:22.642Z"), "The date is 2018-05-08T15:20:22.642Z"]], + errors: [] + } + }, { description: "Blank line at beginning", input: '\r\na,b,c\r\nd,e,f', @@ -1355,7 +1431,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 23, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1371,7 +1448,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 19, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1387,7 +1465,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 28, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1403,7 +1482,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 27, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1419,7 +1499,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 29, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1435,7 +1516,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 24, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1451,7 +1533,8 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 27, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, @@ -1467,10 +1550,29 @@ var PARSE_TESTS = [ delimiter: ',', cursor: 27, aborted: false, - truncated: false + truncated: false, + renamedHeaders: null } } }, + { + description: "UTF-8 BOM encoded input is stripped from invisible BOM character", + input: '\ufeffA,B\nX,Y', + config: {}, + expected: { + data: [['A', 'B'], ['X', 'Y']], + errors: [], + } + }, + { + description: "UTF-8 BOM encoded input with header produces column key stripped from invisible BOM character", + input: '\ufeffA,B\nX,Y', + config: { header: true }, + expected: { + data: [{A: 'X', B: 'Y'}], + errors: [], + } + }, { description: "Parsing with skipEmptyLines set to 'greedy'", notes: "Must parse correctly without lines with no content", @@ -1506,7 +1608,53 @@ var PARSE_TESTS = [ data: [['a', 'b', 'c\n'], ['d', 'e', 'f']], errors: [] } - } + }, + { + description: "Skip First N number of lines , with header and 2 rows", + input: 'a,b,c,d\n1,2,3,4', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header and 2 rows", + input: 'to-be-ignored\na,b,c,d\n1,2,3,4', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [{a: '1', b: '2', c: '3', d: '4'}], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: 1 }, + expected: { + data: [['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false and skipFirstNLines as negative value", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: -2 }, + expected: { + data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Skip first 2 lines , with custom newline character", + input: 'skip-this\rskip-this\r1,2,3,4', + config: { header: false, skipFirstNLines: 2, newline: '\r' }, + expected: { + data: [['1','2','3','4']], + errors: [] + } + }, + ]; describe('Parse Tests', function() { @@ -1525,6 +1673,26 @@ describe('Parse Tests', function() { for (var i = 0; i < PARSE_TESTS.length; i++) { generateTest(PARSE_TESTS[i]); } + + // Custom test for Issue 1024 - renamedHeaders regression test + it('Issue 1024: renamedHeaders returned for simple duplicate headers (regression test)', function() { + var result = Papa.parse('Column,Column\n1-1,1-2\n2-1,2-2\n3-1,3-2', { header: true }); + + // Test data structure + assert.deepEqual(result.data, [ + {Column: '1-1', Column_1: '1-2'}, + {Column: '2-1', Column_1: '2-2'}, + {Column: '3-1', Column_1: '3-2'} + ]); + + // Test errors + assert.deepEqual(result.errors, []); + + // Test that renamedHeaders is present and correct + assert.isNotNull(result.meta.renamedHeaders, 'renamedHeaders should not be null'); + assert.isObject(result.meta.renamedHeaders, 'renamedHeaders should be an object'); + assert.deepEqual(result.meta.renamedHeaders, {Column_1: 'Column'}, 'renamedHeaders should contain the renamed header mapping'); + }); }); @@ -1589,6 +1757,31 @@ var PARSE_ASYNC_TESTS = [ data: [['A','B','C'],['X','Y','Z']], errors: [] } + }, + { + description: "File with a few regular and lots of empty lines", + disabled: !FILES_ENABLED, + input: FILES_ENABLED ? new File(["A,B,C\nX,Y,Z\n" + new Array(500000).fill(",,").join("\n")], "sample.csv") : false, + config: { + skipEmptyLines: "greedy" + }, + expected: { + data: [['A','B','C'],['X','Y','Z']], + errors: [] + } + }, + { + description: "File with a few regular and lots of empty lines + worker", + disabled: !FILES_ENABLED, + input: FILES_ENABLED ? new File(["A,B,C\nX,Y,Z\n" + new Array(500000).fill(",,").join("\n")], "sample.csv") : false, + config: { + worker: true, + skipEmptyLines: "greedy" + }, + expected: { + data: [['A','B','C'],['X','Y','Z']], + errors: [] + } } ]; @@ -1704,6 +1897,12 @@ var UNPARSE_TESTS = [ config: { delimiter: ', ' }, expected: 'A, b, c\r\nd, e, f' }, + { + description: "Custom delimiter (Multi-character), field contains custom delimiter", + input: [['A', 'b', 'c'], ['d', 'e', 'f, g']], + config: { delimiter: ', ' }, + expected: 'A, b, c\r\nd, e, "f, g"' + }, { description: "Bad delimiter (\\n)", notes: "Should default to comma", @@ -1811,9 +2010,9 @@ var UNPARSE_TESTS = [ }, { description: "Returns without rows with no content when skipEmptyLines is 'greedy'", - input: [[null, ' '], [], ['1', '2']], + input: [[null, ' '], [], ['1', '2']].concat(new Array(500000).fill(['', ''])).concat([['3', '4']]), config: {skipEmptyLines: 'greedy'}, - expected: '1,2' + expected: '1,2\r\n3,4' }, { description: "Returns empty rows when empty rows are passed and skipEmptyLines is false with headers", @@ -1840,6 +2039,13 @@ var UNPARSE_TESTS = [ config: {columns: ['a', 'b', 'c']}, expected: 'a,b,c\r\n1,2,\r\n\r\n3,,4' }, + { + description: "Column option used to manually specify keys with input type object", + notes: "Should not throw any error when attempting to serialize key not present in object. Columns are different than keys of the first object. When an object is missing a key then the serialized value should be an empty string.", + input: { data: [{a: 1, b: '2'}, {}, {a: 3, d: 'd', c: 4,}] }, + config: {columns: ['a', 'b', 'c']}, + expected: 'a,b,c\r\n1,2,\r\n\r\n3,,4' + }, { description: "Use different escapeChar", input: [{a: 'foo', b: '"quoted"'}], @@ -1856,7 +2062,7 @@ var UNPARSE_TESTS = [ description: "Escape formulae", input: [{ "Col1": "=danger", "Col2": "@danger", "Col3": "safe" }, { "Col1": "safe=safe", "Col2": "+danger", "Col3": "-danger, danger" }, { "Col1": "'+safe", "Col2": "'@safe", "Col3": "safe, safe" }], config: { escapeFormulae: true }, - expected: 'Col1,Col2,Col3\r\n\'=danger,\'@danger,safe\r\nsafe=safe,\'+danger,"\'-danger, danger"\r\n\'+safe,\'@safe,"safe, safe"' + expected: 'Col1,Col2,Col3\r\n"\'=danger","\'@danger",safe\r\nsafe=safe,"\'+danger","\'-danger, danger"\r\n\'+safe,\'@safe,"safe, safe"' }, { description: "Don't escape formulae by default", @@ -1873,7 +2079,7 @@ var UNPARSE_TESTS = [ description: "Escape formulae with single-quote quoteChar and escapeChar", input: [{ "Col1": "=danger", "Col2": "@danger", "Col3": "safe" }, { "Col1": "safe=safe", "Col2": "+danger", "Col3": "-danger, danger" }, { "Col1": "'+safe", "Col2": "'@safe", "Col3": "safe, safe" }], config: { escapeFormulae: true, quoteChar: "'", escapeChar: "'" }, - expected: 'Col1,Col2,Col3\r\n\'\'=danger,\'\'@danger,safe\r\nsafe=safe,\'\'+danger,\'\'\'-danger, danger\'\r\n\'\'+safe,\'\'@safe,\'safe, safe\'' + expected: 'Col1,Col2,Col3\r\n\'\'\'=danger\',\'\'\'@danger\',safe\r\nsafe=safe,\'\'\'+danger\',\'\'\'-danger, danger\'\r\n\'\'+safe,\'\'@safe,\'safe, safe\'' }, { description: "Escape formulae with single-quote quoteChar and escapeChar and forced quotes", @@ -1881,6 +2087,31 @@ var UNPARSE_TESTS = [ config: { escapeFormulae: true, quotes: true, quoteChar: "'", escapeChar: "'" }, expected: '\'Col1\',\'Col2\',\'Col3\'\r\n\'\'\'=danger\',\'\'\'@danger\',\'safe\'\r\n\'safe=safe\',\'\'\'+danger\',\'\'\'-danger, danger\'\r\n\'\'\'+safe\',\'\'\'@safe\',\'safe, safe\'' }, + // new escapeFormulae values: + { + description: "Escape formulae with tab and carriage-return", + input: [{ "Col1": "\tdanger", "Col2": "\rdanger,", "Col3": "safe\t\r" }], + config: { escapeFormulae: true }, + expected: 'Col1,Col2,Col3\r\n"\'\tdanger","\'\rdanger,","safe\t\r"' + }, + { + description: "Escape formulae with tab and carriage-return, with forced quotes", + input: [{ "Col1": " danger", "Col2": "\rdanger,", "Col3": "safe\t\r" }], + config: { escapeFormulae: true, quotes: true }, + expected: '"Col1","Col2","Col3"\r\n"\'\tdanger","\'\rdanger,","safe\t\r"' + }, + { + description: "Escape formulae with tab and carriage-return, with single-quote quoteChar and escapeChar", + input: [{ "Col1": " danger", "Col2": "\rdanger,", "Col3": "safe, \t\r" }], + config: { escapeFormulae: true, quoteChar: "'", escapeChar: "'" }, + expected: 'Col1,Col2,Col3\r\n\'\'\'\tdanger\',\'\'\'\rdanger,\',\'safe, \t\r\'' + }, + { + description: "Escape formulae with tab and carriage-return, with single-quote quoteChar and escapeChar and forced quotes", + input: [{ "Col1": " danger", "Col2": "\rdanger,", "Col3": "safe, \t\r" }], + config: { escapeFormulae: true, quotes: true, quoteChar: "'", escapeChar: "'" }, + expected: '\'Col1\',\'Col2\',\'Col3\'\r\n\'\'\'\tdanger\',\'\'\'\rdanger,\',\'safe, \t\r\'' + }, ]; describe('Unparse Tests', function() { @@ -2162,6 +2393,22 @@ var CUSTOM_TESTS = [ }); } }, + { + description: "Data is correctly parsed with steps when there are empty values", + expected: [{A: 'a', B: 'b', C: 'c', D: 'd'}, {A: 'a', B: '', C: '', D: ''}], + run: function(callback) { + var data = []; + Papa.parse('A,B,C,D\na,b,c,d\na,,,', { + header: true, + step: function(results) { + data.push(results.data); + }, + complete: function() { + callback(data); + } + }); + } + }, { description: "Step is called with the contents of the row", expected: ['A', 'b', 'c'], @@ -2547,7 +2794,24 @@ var CUSTOM_TESTS = [ var results = Papa.parse('"A","B","C","D"'); callback(results.meta.delimiter); } - } + }, + { + description: "Data is correctly parsed with chunks and duplicated headers", + expected: [{h0: 'a', h1: 'a'}, {h0: 'b', h1: 'b'}], + run: function(callback) { + var data = []; + Papa.parse('h0,h1\na,a\nb,b', { + header: true, + chunkSize: 10, + chunk: function(results) { + data.push(results.data[0]); + }, + complete: function() { + callback(data); + } + }); + } + }, ]; describe('Custom Tests', function() { @@ -2567,3 +2831,52 @@ describe('Custom Tests', function() { generateTest(CUSTOM_TESTS[i]); } }); + +(typeof window !== "undefined" ? describe : describe.skip)("Browser Tests", () => { + it("When parsing synchronously inside a web-worker not owned by PapaParse we should not invoke postMessage", async() => { + // Arrange + const papaParseScriptPath = new URL("../papaparse.js", window.document.baseURI).href; + + // Define our custom web-worker that loads PapaParse and executes a synchronous parse + const blob = new Blob([ + ` + importScripts('${papaParseScriptPath}'); + + self.addEventListener("message", function(event) { + if (event.data === "ExecuteParse") { + // Perform our synchronous parse, as requested + const results = Papa.parse('x\\ny\\n'); + postMessage({type: "ParseExecutedSuccessfully", results}); + } else { + // Otherwise, send whatever we received back. We shouldn't be hitting this (!) If we're reached + // this it means PapaParse thinks it is running inside a web-worker that it owns + postMessage(event.data); + } + }); + ` + ], {type: 'text/javascript'}); + + const blobURL = window.URL.createObjectURL(blob); + const webWorker = new Worker(blobURL); + + const receiveMessagePromise = new Promise((resolve, reject) => { + webWorker.addEventListener("message", (event) => { + if (event.data.type === "ParseExecutedSuccessfully") { + resolve(event.data); + } else { + const error = new Error(`Received unexpected message: ${JSON.stringify(event.data, null, 2)}`); + error.data = event.data; + reject(error); + } + }); + }); + + // Act + webWorker.postMessage("ExecuteParse"); + const webWorkerMessage = await receiveMessagePromise; + + // Assert + assert.equal("ParseExecutedSuccessfully", webWorkerMessage.type); + assert.equal(3, webWorkerMessage.results.data.length); + }); +}); diff --git a/tests/utf-8-bom-sample.csv b/tests/utf-8-bom-sample.csv new file mode 100644 index 00000000..4f85ff06 --- /dev/null +++ b/tests/utf-8-bom-sample.csv @@ -0,0 +1,2 @@ +A,B,C +X,Y,Z