diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..3d53e48 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,73 @@ +name: test + +on: + push: + paths-ignore: + - '**.md' + - 'LICENSE' + + +jobs: + luacheck: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v2 + - + name: Setup Lua + uses: leafo/gh-actions-lua@v9 + - + name: Setup Luarocks + uses: leafo/gh-actions-luarocks@v4 + - + name: Install Tools + run: luarocks install luacheck + - + name: Run luacheck + run: | + luacheck . + + test: + runs-on: ubuntu-latest + strategy: + matrix: + lua-version: + - "5.1" + - "5.2" + - "5.3" + - "5.4" + steps: + - + name: Checkout + uses: actions/checkout@v2 + with: + submodules: 'true' + - + name: Setup Lua ${{ matrix.lua-version }} + uses: leafo/gh-actions-lua@v9 + with: + luaVersion: ${{ matrix.lua-version }} + - + name: Setup Luarocks + uses: leafo/gh-actions-luarocks@v4 + - + name: Install Tools + run: | + luarocks install testcase + luarocks install luacov + - + name: Install + run: | + luarocks make + - + name: Run Test + run: | + testcase --coverage ./test/ + - + name: Upload coverage to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: unittests + diff --git a/README.md b/README.md index a0623e4..780f112 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,20 @@ # lua-regex -regular expression for lua. +[![test](https://github.com/mah0x211/lua-regex/actions/workflows/test.yml/badge.svg)](https://github.com/mah0x211/lua-regex/actions/workflows/test.yml) +[![codecov](https://codecov.io/gh/mah0x211/lua-regex/branch/master/graph/badge.svg)](https://codecov.io/gh/mah0x211/lua-regex) -**NOTE:** this module is under heavy development. +simple regular expression module for lua. -## Dependencies -- lua-pcre2: +## Installation ---- - -## regex module - -```lua -local regex = require('regex') +```sh +luarocks install regex ``` +*** + ## Regular expression flags @@ -37,7 +35,7 @@ local regex = require('regex') creates a new regex object. -**Params** +**Parameters** - `pattern:string`: string containing expression to be compiled. - `flgs:string`: [regular expression flags](#regular-expression-flags). @@ -47,15 +45,34 @@ creates a new regex object. - `re:table`: regex object. - `err:string`: error message. +**Example** + +```lua +local regex = require('regex') +local re, err = regex.new('a(b+)(c+)', 'i') +if re then + local arr, err = re:match('ABBBCCC') + if arr then + print(arr[1]) -- 'ABBBCCC' + print(arr[2]) -- 'BBB' + print(arr[3]) -- 'CCC' + else + print(err) + end +else + print(err) +end +``` + ## Instance Methods -### arr, err = re:match( sbj [, offset] ) +## arr, err = regex:match( sbj [, offset] ) matches a compiled regular expression against a given subject string. It returns matched substrings. -**Params** +**Parameters** - `sbj:string`: the subject string. - `offset:number`: offset in the subject at which to start matching. @@ -66,11 +83,11 @@ matches a compiled regular expression against a given subject string. It returns - `err:string`: error message. -### arr, err = re:matches( sbj [, offset] ) +## arr, err = regex:matches( sbj [, offset] ) almost same as `match` method but it returns all matched substrings except capture strings. -**Params** +**Parameters** - `sbj:string`: the subject string. - `offset:number`: offset in the subject at which to start matching. @@ -81,43 +98,41 @@ almost same as `match` method but it returns all matched substrings except captu - `err:string`: error message. -### heads, tails, err = re:indexof( sbj [, offset] ) +## arr, err = regex:indexof( sbj [, offset] ) almost same as `match` method but it returns offsets of matched substrings. -**Params** +**Parameters** - `sbj:string`: the subject string. - `offset:number`: offset in the subject at which to start matching. **Returns** -- `heads:table`: array of head offset of matched substrings. -- `tails:table`: array of tail offset of matched substrings. +- `arr:table`: array of offsets of matched substrings. 1st index is the start offset of matched substring, and 2nd index is the end offset of matched substring, and 3rd index is the start offset of 1st capture string, and 4th index is the end offset of 1st capture string, and so on. - `err:string`: error message. -### heads, tails, err = re:indexesof( sbj [, offset] ) +## arr, err = regex:indexesof( sbj [, offset] ) -almost same as `match` method but it returns all offsets of matched substrings except capture strings. +almost same as `match` method but it returns all offsets of matched substrings **except capture strings**. -**Params** +**Parameters** - `sbj:string`: the subject string. - `offset:number`: offset in the subject at which to start matching. **Returns** -- `heads:table`: array of head offset of matched substrings. -- `tails:table`: array of tail offset of matched substrings. +- `arr:table`: array of offsets of matched substrings. 1st index is the start offset of matched substring, and 2nd index is the end offset of matched substring, and so on. - `err:string`: error message. -### ok, err = re:test( sbj [, offset] ) +## ok, err = regex:test( sbj [, offset] ) returns true if there is a matched. -**Params** +**Parameters** - `sbj:string`: the subject string. - `offset:number`: offset in the subject at which to start matching. @@ -132,89 +147,67 @@ returns true if there is a matched. ## Static Methods -### arr, err = regex.match( sbj, pattern [, flgs [, offset]] ) - -same as `match` instance method. - -**Params** - -- `sbj:string`: the subject string. -- `pattern:string`: string containing expression to be compiled. -- `flgs:string`: [regular expression flags](#regular-expression-flags). -- `offset:number`: offset in the subject at which to start matching. - -**Returns** - -- `arr:table`: array of matched substrings. -- `err:string`: error message. - - -### arr, err = regex.matches( sbj, pattern [, flgs [, offset]] ) - -same as `matches` instance method. - -**Params** - -- `sbj:string`: the subject string. -- `pattern:string`: string containing expression to be compiled. -- `flgs:string`: [regular expression flags](#regular-expression-flags). -- `offset:number`: offset in the subject at which to start matching. - -**Returns** - -- `arr:table`: array of matched substrings. -- `err:string`: error message. - - -### heads, tails, err = regex.indexof( sbj, pattern [, flgs [, offset]] ) - -same as `indexof` instance method. +## arr, err = regex.match( sbj, pattern [, flgs [, offset]] ) -**Params** +same as the following code: -- `sbj:string`: the subject string. -- `pattern:string`: string containing expression to be compiled. -- `flgs:string`: [regular expression flags](#regular-expression-flags). -- `offset:number`: offset in the subject at which to start matching. +```lua +local re, err = regex.new( pattern, flgs ) +if re then + return re:match( sbj, offset ) +end +return nil, err +``` -**Returns** -- `heads:table`: array of head offset of matched substrings. -- `tails:table`: array of tail offset of matched substrings. -- `err:string`: error message. +## arr, err = regex.matches( sbj, pattern [, flgs [, offset]] ) +same as the following code: -### heads, tails, err = regex.indexesof( sbj, pattern [, flgs [, offset]] ) +```lua +local re, err = regex.new( pattern, flgs ) +if re then + return re:matches( sbj, offset ) +end +return nil, err +``` -same as `indexesof` instance method. -**Params** +## arr, err = regex.indexof( sbj, pattern [, flgs [, offset]] ) -- `sbj:string`: the subject string. -- `pattern:string`: string containing expression to be compiled. -- `flgs:string`: [regular expression flags](#regular-expression-flags). -- `offset:number`: offset in the subject at which to start matching. +same as the following code: -**Returns** +```lua +local re, err = regex.new( pattern, flgs ) +if re then + return re:indexof( sbj, offset ) +end +return nil, err +``` -- `heads:table`: array of head offset of matched substrings. -- `tails:table`: array of tail offset of matched substrings. -- `err:string`: error message. +## arr, err = regex.indexesof( sbj, pattern [, flgs [, offset]] ) -### ok, err = regex.test( sbj, pattern [, flgs [, offset]] ) +same as the following code: -same as `test` instance method. +```lua +local re, err = regex.new( pattern, flgs ) +if re then + return re:indexesof( sbj, offset ) +end +return nil, err +``` -**Params** -- `sbj:string`: the subject string. -- `pattern:string`: string containing expression to be compiled. -- `flgs:string`: [regular expression flags](#regular-expression-flags). -- `offset:number`: offset in the subject at which to start matching. +## ok, err = regex.test( sbj, pattern [, flgs [, offset]] ) -**Returns** +same as the following code: -- `ok:boolean`: true on matched. -- `err:string`: error message. +```lua +local re, err = regex.new( pattern, flgs ) +if re then + return re:test( sbj, offset ) +end +return nil, err +``` diff --git a/regex.lua b/regex.lua index 8db2501..91307b9 100644 --- a/regex.lua +++ b/regex.lua @@ -176,26 +176,26 @@ end --- @return string[]? arr --- @return any err function Regex:match(sbj, offset) - local head, tail, err = self.p:match(sbj, offset or self.lastidx) + local heads, tails, err = self.p:match(sbj, offset or self.lastidx) - if head then + if heads then -- found local arr = {} - for i = 1, #head do - arr[i] = sub(sbj, head[i], tail[i]) + for i = 1, #heads do + arr[i] = sub(sbj, heads[i], tails[i]) end -- updaet a last-index if global option is enabled if self.global == true then - self.lastidx = tail[1] + self.lastidx = tails[1] end return arr + elseif err then + return nil, err elseif self.global then -- reset a last-index to 0 if global option is enabled self.lastidx = 0 end - - return nil, err end --- indexesof @@ -291,10 +291,10 @@ Regex = require('metamodule').new(Regex) --- @return any err local function matches(sbj, pattern, flags, offset) local re, err = Regex(pattern, flags) - if err then - return nil, err + if re then + return re:matches(sbj, offset) end - return re:matches(sbj, offset) + return nil, err end --- match @@ -306,10 +306,10 @@ end --- @return any err local function match(sbj, pattern, flags, offset) local re, err = Regex(pattern, flags) - if err then - return nil, err + if re then + return re:match(sbj, offset) end - return re:match(sbj, offset) + return nil, err end --- indexesof @@ -321,10 +321,10 @@ end --- @return any err local function indexesof(sbj, pattern, flags, offset) local re, err = Regex(pattern, flags) - if err then - return nil, err + if re then + return re:indexesof(sbj, offset) end - return re:indexesof(sbj, offset) + return nil, err end --- indexof @@ -336,10 +336,10 @@ end --- @return any err local function indexof(sbj, pattern, flags, offset) local re, err = Regex(pattern, flags) - if err then - return nil, err + if re then + return re:indexof(sbj, offset) end - return re:indexof(sbj, offset) + return nil, err end --- test @@ -351,10 +351,10 @@ end --- @return any err local function test(sbj, pattern, flags, offset) local re, err = Regex(pattern, flags) - if err then - return false, err + if re then + return re:test(sbj, offset) end - return re:test(sbj, offset) + return false, err end return { diff --git a/test/regex_test.lua b/test/regex_test.lua new file mode 100644 index 0000000..710f952 --- /dev/null +++ b/test/regex_test.lua @@ -0,0 +1,422 @@ +require('luacov') +local testcase = require('testcase') +local assert = require('assert') +local regex = require('regex') + +function testcase.new() + -- test that create a new regex object + local re, err = regex.new('abc', 'ismxgojU') + assert.is_nil(err) + assert.match(re, '^regex: ', false) + + -- test that return error if failed to compile pattern + re, err = regex.new('abc(') + assert.is_nil(re) + assert.match(err, 'compilation failed') + + -- test that throws error if pattern is not string + err = assert.throws(regex.new, 123) + assert.match(err, 'pattern must be string') + + -- test that throws error if flags is not string + err = assert.throws(regex.new, 'abc', 123) + assert.match(err, 'flags must be string or nil') + + -- test that throws error if unknown flag is provided + err = assert.throws(regex.new, 'abc', 'v') + assert.match(err, 'unknown flag "v"') +end + +function testcase.matches_method() + local re = assert(regex.new('[a-z]+([08]\\d*)')) + local sbj = 'abcd0123efg4567hijk890' + + -- test that return matches in string array + local arr, err = re:matches(sbj) + assert.is_nil(err) + assert.equal(arr, { + 'abcd0123', + 'hijk890', + }) + + -- test that exec matches with offset + arr, err = re:matches(sbj, 6) + assert.is_nil(err) + assert.equal(arr, { + 'hijk890', + }) + + -- test that return nil and error if invalid offset + arr, err = re:matches(sbj, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(re.matches, re, 123) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(re.matches, re, sbj, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.matches() + local sbj = 'abcd0123efg4567hijk890' + local pattern = '[a-z]+([08]\\d*)' + + -- test that return matches in string array + local arr, err = regex.matches(sbj, pattern, nil, 6) + assert.is_nil(err) + assert.equal(arr, { + 'hijk890', + }) + + -- test that return nil and error if invalid pattern + arr, err = regex.matches(sbj, 'abc(', nil, 1) + assert.match(err, 'compilation failed') + assert.is_nil(arr) + + -- test that return nil and error if invalid offset + arr, err = regex.matches(sbj, pattern, nil, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(regex.matches, 123, pattern) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(regex.matches, sbj, pattern, nil, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.match_method() + local re = assert(regex.new('[a-z]+([08]\\d*)')) + local sbj = 'abcd0123efg4567hijk890' + + -- test that return first match in string array + local arr, err = re:match(sbj) + assert.is_nil(err) + assert.equal(arr, { + 'abcd0123', + '0123', + }) + + -- test that always return first matches if global flag is not set + arr, err = re:match(sbj) + assert.is_nil(err) + assert.equal(arr, { + 'abcd0123', + '0123', + }) + + -- test that exec matches with offset + arr, err = re:match(sbj, 6) + assert.is_nil(err) + assert.equal(arr, { + 'hijk890', + '890', + }) + + -- test that return next matches if global flag is set + re = assert(regex.new('[a-z]+([08]\\d*)', 'g')) + for i, exp in ipairs({ + { + 'abcd0123', + '0123', + }, + { + 'hijk890', + '890', + }, + {}, + }) do + arr, err = re:match(sbj) + if i == 3 then + assert.is_nil(arr) + assert.is_nil(err) + else + assert.is_nil(err) + assert.equal(arr, exp) + end + end + + -- test that return nil and error if invalid offset + arr, err = re:match(sbj, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(re.match, re, 123) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(re.match, re, sbj, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.match() + local sbj = 'abcd0123efg4567hijk890' + local pattern = '[a-z]+([08]\\d*)' + + -- test that return first match in string array + local arr, err = regex.match(sbj, pattern, nil, 6) + assert.is_nil(err) + assert.equal(arr, { + 'hijk890', + '890', + }) + + -- test that return nil and error if invalid pattern + arr, err = regex.match(sbj, 'abc(', nil, 1) + assert.match(err, 'compilation failed') + assert.is_nil(arr) + + -- test that return nil and error if invalid offset + arr, err = regex.match(sbj, pattern, nil, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(regex.match, 123, pattern) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(regex.match, sbj, pattern, nil, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.indexesof_method() + local re = assert(regex.new('[a-z]+([08]\\d*)')) + local sbj = 'abcd0123efg4567hijk890' + + -- test that return indexes of matches in integer array + local arr, err = re:indexesof(sbj) + assert.is_nil(err) + assert.equal(arr, { + -- 1st match 'abcd0123' + 1, + 8, + -- 2nd match 'hijk890' and capture '890' + 16, + 22, + }) + + -- test that return nil and error if invalid offset + arr, err = re:indexesof(sbj, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(re.indexesof, re, 123) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(re.indexesof, re, sbj, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.indexesof() + local sbj = 'abcd0123efg4567hijk890' + local pattern = '[a-z]+([08]\\d*)' + + -- test that return indexes of matches in integer array + local arr, err = regex.indexesof(sbj, pattern) + assert.is_nil(err) + assert.equal(arr, { + -- 1st match 'abcd0123' + 1, + 8, + -- 2nd match 'hijk890' and capture '890' + 16, + 22, + }) + + -- test that return nil and error if invalid pattern + arr, err = regex.indexesof(sbj, 'abc(', nil, 1) + assert.match(err, 'compilation failed') + assert.is_nil(arr) + + -- test that return nil and error if invalid offset + arr, err = regex.indexesof(sbj, pattern, nil, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(regex.indexesof, 123, pattern) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(regex.indexesof, sbj, pattern, nil, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.indexof_method() + local re = assert(regex.new('[a-z]+([08]\\d*)')) + local sbj = 'abcd0123efg4567hijk890' + + -- test that return first match in integer array + local arr, err = re:indexof(sbj) + assert.is_nil(err) + assert.equal(arr, { + -- 1st match 'abcd0123' + 1, + 8, + 5, + -- capture '0123' + 8, + }) + + -- test that always return first matches if global flag is not set + arr, err = re:indexof(sbj) + assert.is_nil(err) + assert.equal(arr, { + 1, + 8, + 5, + 8, + }) + + -- test that exec matches with offset + arr, err = re:indexof(sbj, 6) + assert.is_nil(err) + assert.equal(arr, { + -- 1st match 'hijk890', + 16, + 22, + -- capture '890' + 20, + 22, + }) + + -- test that return next matches if global flag is set + re = assert(regex.new('[a-z]+([08]\\d*)', 'g')) + for i, exp in ipairs({ + { + -- 1st match 'abcd0123' + 1, + 8, + -- capture '0123' + 5, + 8, + }, + { + -- 2nd match 'hijk890' + 16, + 22, + -- capture '890' + 20, + 22, + }, + {}, + }) do + arr, err = re:indexof(sbj) + if i == 3 then + assert.is_nil(arr) + assert.is_nil(err) + else + assert.is_nil(err) + assert.equal(arr, exp) + end + end + + -- test that return nil and error if invalid offset + arr, err = re:indexof(sbj, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(re.indexof, re, 123) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(re.indexof, re, sbj, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.indexof() + local sbj = 'abcd0123efg4567hijk890' + local pattern = '[a-z]+([08]\\d*)' + + -- test that return first match in integer array + local arr, err = regex.indexof(sbj, pattern, nil, 6) + assert.is_nil(err) + assert.equal(arr, { + -- 1st match 'hijk890', + 16, + 22, + -- capture '890' + 20, + 22, + }) + + -- test that return nil and error if invalid pattern + arr, err = regex.indexof(sbj, 'abc(', nil, 1) + assert.match(err, 'compilation failed') + assert.is_nil(arr) + + -- test that return nil and error if invalid offset + arr, err = regex.indexof(sbj, pattern, nil, -1) + assert.match(err, 'offset') + assert.is_nil(arr) + + -- test that throws error if subject is not string + err = assert.throws(regex.indexof, 123, pattern) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(regex.indexof, sbj, pattern, nil, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.test_method() + local re = assert(regex.new('[a-z]+([08]\\d*)')) + local sbj = 'abcd0123efg4567hijk890' + + -- test that return true if matches found + local ok, err = re:test(sbj) + assert.is_nil(err) + assert.is_true(ok) + + -- test that return false if matches not found + ok, err = re:test('abc') + assert.is_nil(err) + assert.is_false(ok) + + -- test that return false and error if invalid offset + ok, err = re:test(sbj, -1) + assert.match(err, 'offset') + assert.is_false(ok) + + -- test that throws error if subject is not string + err = assert.throws(re.test, re, 123) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(re.test, re, sbj, 1.23) + assert.match(err, 'integer expected') +end + +function testcase.test() + local sbj = 'abcd0123efg4567hijk890' + local pattern = '[a-z]+([08]\\d*)' + + -- test that return true if matches found + local ok, err = regex.test(sbj, pattern) + assert.is_true(ok) + assert.is_nil(err) + + -- test that return false if matches not found + ok, err = regex.test(sbj, 'abc(') + assert.is_false(ok) + assert.match(err, 'compilation failed') + + -- test that throws error if subject is not string + err = assert.throws(regex.test, 123, pattern) + assert.match(err, 'string expected') + + -- test that throws error if offset is not integer + err = assert.throws(regex.test, sbj, pattern, nil, 1.23) + assert.match(err, 'integer expected') +end +