From 6a998bcfa265ae432c05783563e982d3779d1077 Mon Sep 17 00:00:00 2001 From: Roger Ramos Date: Sat, 13 Sep 2025 02:52:16 -0500 Subject: [PATCH] Add position information support - Add includeDetails parameter to main export function - Return position metadata when requested - Add tests and documentation Add position information support - Add includeDetails parameter to main export function - Return position metadata when requested - Add tests and documentation --- README.md | 30 +++++ lib/extract-json-from-string.js | 30 ++++- test/extract-json-from-string.js | 181 +++++++++++++++++++++++++++++++ 3 files changed, 235 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 43ff8a2..301f46b 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,36 @@ let objects = window.extractJson('Expected { foo: "bar" } to equal { foo: "baz" // ] ``` +### With Position Information + +To get the position information along with the extracted objects, pass `true` as the second parameter: + +```js +const extract = require('extract-json-from-string'); + +let detailed = extract('Expected { foo: "bar" } to equal { foo: "baz" }', true); +// [ +// { +// object: { foo: 'bar' }, +// raw: '{ foo: "bar" }', +// start: 9, +// end: 23 +// }, +// { +// object: { foo: 'baz' }, +// raw: '{ foo: "baz" }', +// start: 33, +// end: 47 +// } +// ] +``` + +Each result object contains: +- `object`: The parsed JSON/javascript object +- `raw`: The original string that was extracted +- `start`: Starting position in the original string +- `end`: Ending position in the original string + ## N.B. For the time being, I've written a very naive implementation. There are lots of ways to break this (like stringified JSON or escaped quotes within the value of a property). Please report any issues, and I'll do my best to fix them and make it _less_ naive. diff --git a/lib/extract-json-from-string.js b/lib/extract-json-from-string.js index 36b8d83..9a71119 100644 --- a/lib/extract-json-from-string.js +++ b/lib/extract-json-from-string.js @@ -53,20 +53,38 @@ const extract = (str) => { } let obj = str.substring(0, endIndex + 1); - return obj; + return { + raw: obj, + start: startIndex, + end: startIndex + endIndex + 1 + }; }; -module.exports = (str) => { +module.exports = (str, includeDetails = false) => { let result; const objects = []; - while ((result = extract(str)) !== null) { + let currentOffset = 0; + let remainingStr = str; + + while ((result = extract(remainingStr)) !== null) { try { - let obj = jsonify(result); - objects.push(obj); + let obj = jsonify(result.raw); + objects.push({ + object: obj, + raw: result.raw, + start: currentOffset + result.start, + end: currentOffset + result.end + }); } catch (e) { // Do nothing } - str = str.replace(result, ''); + + currentOffset = currentOffset + result.end; + remainingStr = str.substring(currentOffset); + } + + if (!includeDetails) { + return objects.map(item => item.object); } return objects; diff --git a/test/extract-json-from-string.js b/test/extract-json-from-string.js index f00a940..433dc8d 100644 --- a/test/extract-json-from-string.js +++ b/test/extract-json-from-string.js @@ -139,4 +139,185 @@ describe('extract-json-from-string', function() { }) }) }) + + describe('extract() with includeDetails parameter', () => { + context('with a single JSON object', () => { + it('should return the object with position information', () => { + let text = `Here's an object ${JSON.stringify({ foo: 'bar' })} that should be extracted`; + let objs = extract(text, true); + + objs.length.should.equal(1); + objs[0].should.have.properties(['object', 'start', 'end', 'raw']); + objs[0].object.should.eql({ foo: 'bar' }); + objs[0].start.should.equal(17); + objs[0].end.should.equal(30); + objs[0].raw.should.equal('{"foo":"bar"}'); + }) + }) + + context('with a single object', () => { + it('should return the object with position information', () => { + let text = "Here's an object { foo: 'bar' } that should be extracted"; + let objs = extract(text, true); + + objs.length.should.equal(1); + objs[0].should.have.properties(['object', 'start', 'end', 'raw']); + objs[0].object.should.eql({ foo: 'bar' }); + objs[0].start.should.equal(17); + objs[0].end.should.equal(31); + objs[0].raw.should.equal("{ foo: 'bar' }"); + }) + }) + + context('with multiple objects', () => { + it('should return both objects with correct positions', () => { + let text = 'First { a: 1 } and second { b: 2 } object'; + let objs = extract(text, true); + + objs.length.should.equal(2); + + // First object + objs[0].object.should.eql({ a: 1 }); + objs[0].start.should.equal(6); + objs[0].end.should.equal(14); + objs[0].raw.should.equal('{ a: 1 }'); + + // Second object + objs[1].object.should.eql({ b: 2 }); + objs[1].start.should.equal(26); + objs[1].end.should.equal(34); + objs[1].raw.should.equal('{ b: 2 }'); + }) + }) + + context('with arrays and objects mixed', () => { + it('should return all items with correct positions', () => { + let text = 'Array [1, 2] and object { name: \'test\' } here'; + let objs = extract(text, true); + + objs.length.should.equal(2); + + // Array + objs[0].object.should.eql([1, 2]); + objs[0].start.should.equal(6); + objs[0].end.should.equal(12); + objs[0].raw.should.equal('[1, 2]'); + + // Object + objs[1].object.should.eql({ name: 'test' }); + objs[1].start.should.equal(24); + objs[1].end.should.equal(40); + objs[1].raw.should.equal('{ name: \'test\' }'); + }) + }) + + context('with nested structures', () => { + it('should return the outer structure with correct positions', () => { + let text = 'Complex { outer: { inner: 42 } } structure'; + let objs = extract(text, true); + + objs.length.should.equal(1); + objs[0].object.should.eql({ outer: { inner: 42 } }); + objs[0].start.should.equal(8); + objs[0].end.should.equal(32); + objs[0].raw.should.equal('{ outer: { inner: 42 } }'); + }) + }) + + context('with nested arrays', () => { + it('should return the array with correct positions', () => { + let text = 'Here\'s an array [ \'foo\', [ \'bar\', [ \'baz\', \'quux\' ], \'hello\' ] ] that should be extracted'; + let objs = extract(text, true); + + objs.length.should.equal(1); + objs[0].object.should.eql(['foo', ['bar', ['baz', 'quux'], 'hello']]); + objs[0].start.should.equal(16); + objs[0].end.should.equal(64); + objs[0].raw.should.equal('[ \'foo\', [ \'bar\', [ \'baz\', \'quux\' ], \'hello\' ] ]'); + }) + }) + + context('with no valid objects', () => { + it('should return empty array', () => { + let text = 'No objects here, just plain text'; + let objs = extract(text, true); + + objs.length.should.equal(0); + objs.should.be.an.Array(); + }) + }) + + context('with invalid JSON structures', () => { + it('should skip invalid structures and continue', () => { + let text = 'Invalid { broken and valid { foo: \'bar\' } object'; + let objs = extract(text, true); + + objs.length.should.equal(0); + }) + + it('should skip invalid structures but extract valid ones after', () => { + let text = 'Invalid { broken } but valid { working: true } here'; + let objs = extract(text, true); + + objs.length.should.equal(1); + objs[0].object.should.eql({ working: true }); + objs[0].start.should.equal(29); + objs[0].end.should.equal(46); + objs[0].raw.should.equal('{ working: true }'); + }) + }) + + context('with a mix of arrays and objects', () => { + it('should return the outer items with positions', () => { + let objs = extract(`Here's some ['foo', { bar: true }] things to ${JSON.stringify({ baz: 'quux', items: [1, 2, 3], nested: [{ property: { inArray: 1 } }]})} extract`, true); + + objs.length.should.equal(2); + objs[0].object.should.eql(['foo', { bar: true }]); + objs[0].start.should.equal(12); + objs[0].end.should.equal(34); + objs[0].raw.should.equal("['foo', { bar: true }]"); + + objs[1].object.should.eql({ + baz: 'quux', + items: [1, 2, 3], + nested: [ + { + property: { + inArray: 1 + } + } + ] + }); + objs[1].start.should.equal(45); + objs[1].end.should.equal(113); + }) + }) + + context('checking compatibility with normal mode', () => { + it('should return same objects when includeDetails is false', () => { + let text = 'Object { test: \'value\' } and array [1, 2, 3]'; + + let normalResult = extract(text); + let detailedResult = extract(text, false); + + normalResult.should.eql(detailedResult); + normalResult.length.should.equal(2); + normalResult[0].should.eql({ test: 'value' }); + normalResult[1].should.eql([1, 2, 3]); + }) + + it('should return different structure when includeDetails is true', () => { + let text = 'Object { test: \'value\' } here'; + + let normalResult = extract(text); + let detailedResult = extract(text, true); + + normalResult.should.not.eql(detailedResult); + normalResult.should.eql([{ test: 'value' }]); + detailedResult.should.be.an.Array(); + detailedResult[0].should.have.properties(['object', 'start', 'end', 'raw']); + detailedResult[0].object.should.eql({ test: 'value' }); + }) + }) + }) })