Skip to content

Commit

Permalink
Merge pull request #11 from giacomocavalieri/perf-optimisation
Browse files Browse the repository at this point in the history
Improve gsv performance
  • Loading branch information
bcpeinhardt authored Oct 29, 2024
2 parents f10c42a + dd9cfbb commit 3cfa5b7
Show file tree
Hide file tree
Showing 13 changed files with 766 additions and 582 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Changelog

## Unreleased
- Improved performance of `to_lists`, `to_dicts`, `from_lists` and `from_lists`.
- Parsing now doesn't trim the csv fields, conforming to RFC4180.
- The `to_lists` and `to_dicts` functions now return a structured error instead
of a `String`.

## v2.0.3 - 25 October 2024
- Patch to remove some unused imports.
Expand Down
34 changes: 12 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,30 @@
[![Package Version](https://img.shields.io/hexpm/v/gsv)](https://hex.pm/packages/gsv)
[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/gsv/)

This is a simple csv parser and writer for Gleam. It will get more performant/battle tested in the future,
but if you're looking for that now, I'd recommend doing ffi to an existing parser in your target runtime.

#### Example
A simple csv parser and serialiser for Gleam.

```gleam
import gsv.{Unix, Windows}
pub fn main() {
let csv_str = "Hello, World\nGoodbye, Mars"
// Parse a CSV string to a List(List(String))
let assert Ok(records) = gsv.to_lists(csv_str)
let csv =
"name,loves
lucy,gleam"
// Write a List(List(String)) to a CSV string
let csv_str = records
|> gsv.from_lists(separator: ",", line_ending: Windows)
// Parse a csv string into a list of rows.
let assert Ok(rows) = gsv.to_lists(csv)
// -> [["name", "loves"], ["lucy", "gleam"]]
// Parse a CSV string with headers to a List(Dict(String, String))
let assert Ok(records) = gsv.to_dicts(csv_str)
// => [ dict.from_list([ #("Hello", "Goodbye"), #("World", "Mars") ]) ]
// Write a List(Dict(String, String)) to a CSV string, treating the keys as the header row
let csv_str = records
|> gsv.from_dicts(separator: ",", line_ending: Windows)
// If your csv has headers you can also parse it into a list of dictionaries.
let assert Ok(rows) = gsv.to_dicts(csv_str)
// -> dict.from_list([#("name", "lucy"), #("loves", "gleam")])
}
```

## Installation

If available on Hex this package can be added to your Gleam project:
To add this package to your Gleam project:

```sh
gleam add gsv
gleam add gsv@3
```

and its documentation can be found at <https://hexdocs.pm/gsv>.
9 changes: 9 additions & 0 deletions birdie_snapshots/double_quote_in_middle_of_field.accepted
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
version: 1.2.3
title: double quote in middle of field
file: ./test/gsv_test.gleam
test_name: double_quote_in_middle_of_field_test
---
field,other"field
╰─ This is an unescaped double quote
9 changes: 9 additions & 0 deletions birdie_snapshots/unclosed_escaped_field.accepted
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
version: 1.2.3
title: unclosed escaped field
file: ./test/gsv_test.gleam
test_name: unclosed_escaped_field_test
---
"closed","unclosed
╰─ This escaped field is not closed
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
version: 1.2.3
title: unescaped double quote in escaped field
file: ./test/gsv_test.gleam
test_name: unescaped_double_quote_in_escaped_field_test
---
"unescaped double quote -> " in escaped field"
╰─ This is an unescaped double quote
4 changes: 3 additions & 1 deletion gleam.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ internal_modules = [
]

[dependencies]
gleam_stdlib = "~> 0.34 or ~> 1.0"
gleam_stdlib = ">= 0.40.0 and < 1.0.0"
glearray = ">= 1.0.0 and < 2.0.0"

[dev-dependencies]
gleeunit = "~> 1.0"
birdie = ">= 1.2.3 and < 2.0.0"
24 changes: 21 additions & 3 deletions manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,28 @@
# You typically do not need to edit this file

packages = [
{ name = "gleam_stdlib", version = "0.34.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "1FB8454D2991E9B4C0C804544D8A9AD0F6184725E20D63C3155F0AEB4230B016" },
{ name = "gleeunit", version = "1.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "D364C87AFEB26BDB4FB8A5ABDE67D635DC9FA52D6AB68416044C35B096C6882D" },
{ name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" },
{ name = "birdie", version = "1.2.3", build_tools = ["gleam"], requirements = ["argv", "edit_distance", "filepath", "glance", "gleam_community_ansi", "gleam_erlang", "gleam_stdlib", "justin", "rank", "simplifile", "trie_again"], otp_app = "birdie", source = "hex", outer_checksum = "AE1207210E9CC8F4170BCE3FB3C23932F314C352C3FD1BCEA44CF4BF8CF60F93" },
{ name = "edit_distance", version = "2.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "edit_distance", source = "hex", outer_checksum = "A1E485C69A70210223E46E63985FA1008B8B2DDA9848B7897469171B29020C05" },
{ name = "filepath", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "EFB6FF65C98B2A16378ABC3EE2B14124168C0CE5201553DE652E2644DCFDB594" },
{ name = "glance", version = "0.11.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "glexer"], otp_app = "glance", source = "hex", outer_checksum = "8F3314D27773B7C3B9FB58D8C02C634290422CE531988C0394FA0DF8676B964D" },
{ name = "gleam_community_ansi", version = "1.4.1", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "4CD513FC62523053E62ED7BAC2F36136EC17D6A8942728250A9A00A15E340E4B" },
{ name = "gleam_community_colour", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_json", "gleam_stdlib"], otp_app = "gleam_community_colour", source = "hex", outer_checksum = "795964217EBEDB3DA656F5EB8F67D7AD22872EB95182042D3E7AFEF32D3FD2FE" },
{ name = "gleam_erlang", version = "0.27.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "DE468F676D71B313C6C8C5334425CFCF827837333F8AB47B64D8A6D7AA40185D" },
{ name = "gleam_json", version = "1.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib", "thoas"], otp_app = "gleam_json", source = "hex", outer_checksum = "9063D14D25406326C0255BDA0021541E797D8A7A12573D849462CAFED459F6EB" },
{ name = "gleam_stdlib", version = "0.40.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "86606B75A600BBD05E539EB59FABC6E307EEEA7B1E5865AFB6D980A93BCB2181" },
{ name = "glearray", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "glearray", source = "hex", outer_checksum = "B99767A9BC63EF9CC8809F66C7276042E5EFEACAA5B25188B552D3691B91AC6D" },
{ name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" },
{ name = "glexer", version = "1.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "glexer", source = "hex", outer_checksum = "BD477AD657C2B637FEF75F2405FAEFFA533F277A74EF1A5E17B55B1178C228FB" },
{ name = "justin", version = "1.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "justin", source = "hex", outer_checksum = "7FA0C6DB78640C6DC5FBFD59BF3456009F3F8B485BF6825E97E1EB44E9A1E2CD" },
{ name = "rank", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "rank", source = "hex", outer_checksum = "5660E361F0E49CBB714CC57CC4C89C63415D8986F05B2DA0C719D5642FAD91C9" },
{ name = "simplifile", version = "2.2.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0DFABEF7DC7A9E2FF4BB27B108034E60C81BEBFCB7AB816B9E7E18ED4503ACD8" },
{ name = "thoas", version = "1.2.1", build_tools = ["rebar3"], requirements = [], otp_app = "thoas", source = "hex", outer_checksum = "E38697EDFFD6E91BD12CEA41B155115282630075C2A727E7A6B2947F5408B86A" },
{ name = "trie_again", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "trie_again", source = "hex", outer_checksum = "5B19176F52B1BD98831B57FDC97BD1F88C8A403D6D8C63471407E78598E27184" },
]

[requirements]
gleam_stdlib = { version = "~> 0.34 or ~> 1.0" }
birdie = { version = ">= 1.2.3 and < 2.0.0" }
gleam_stdlib = { version = ">= 0.40.0 and < 1.0.0" }
glearray = { version = ">= 1.0.0 and < 2.0.0" }
gleeunit = { version = "~> 1.0" }
Loading

0 comments on commit 3cfa5b7

Please sign in to comment.