Skip to content

Commit

Permalink
Merge pull request #52 from oscar-project/dev
Browse files Browse the repository at this point in the history
Export checksum operation
  • Loading branch information
Uinelj authored Aug 31, 2023
2 parents a86b2ff + 2669cf7 commit 1e25228
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 15 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Rust

on:
push:
branches: [ main, dev ]
branches: [main, dev]
pull_request:
branches: [ main, dev ]
branches: [main, dev]
env:
CARGO_TERM_COLOR: always

Expand All @@ -22,7 +22,10 @@ jobs:

- name: Run cargo-tarpaulin
uses: actions-rs/tarpaulin@v0.1
continue-on-error: true
with:
version: 0.22.0
timeout: 180
args: "--avoid-cfg-tarpaulin"

- name: Upload to codecov.io
uses: codecov/codecov-action@v1
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 20 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
[package]
authors = ["Pedro J. Ortiz <pedro@pjortiz.com>", "Julien Abadji <aulien.jbadji@gmail.com>"]
authors = [
"Pedro J. Ortiz <pedro@pjortiz.com>",
"Julien Abadji <aulien.jbadji@gmail.com>",
]
edition = "2021"
name = "oscar-tools"
version = "0.3.0"
version = "0.4.0"
repository = "https://github.com/oscar-project/oscar-tools"
description = "Tools for processing OSCAR Corpora"
license = "Apache-2.0"

[[bin]]
name = "oscar-tools"
path = "src/main.rs"

[lib]
name = "oscar_tools"
path = "src/lib.rs"

[features]
zstd = ["dep:zstd"]

Expand All @@ -21,7 +32,7 @@ rayon = "1.5.1"
runiq-lib = "1.2.2"
serde_json = "1.0.78"
sha2 = "0.10.1"
zstd = {version="0.11.2", optional=true}
zstd = { version = "0.11.2", optional = true }
walkdir = "2.3.3"

[dependencies.clap]
Expand Down Expand Up @@ -49,5 +60,9 @@ ci = ["github"]
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]

targets = [
"x86_64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-apple-darwin",
]
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod error;
mod ops;

pub use ops::Checksum;
1 change: 1 addition & 0 deletions src/ops/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{
path::{Path, PathBuf},
};

use log::{debug, error, info, warn};
use rayon::{iter::ParallelIterator, prelude::ParallelBridge};
use sha2::{Digest, Sha384};

Expand Down
3 changes: 2 additions & 1 deletion src/ops/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{
};

use flate2::{write::GzEncoder, Compression};
use log::{debug, error, info, warn};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use walkdir::WalkDir;

Expand Down Expand Up @@ -79,7 +80,7 @@ pub trait Compress {

Ok(())
}

/// Recursively compresses files in provided folder.
/// If `del_src` is set to `true`, removes the compressed files at `src` upon compression completion.
/// `src` has to exist and be a folder
Expand Down
2 changes: 1 addition & 1 deletion src/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mod filter_tags;
mod sampling;
mod split;

pub(crate) use checksum::Checksum;
pub use checksum::Checksum;
pub(crate) use compress::Compress;
pub(crate) use dedup::Dedup;
pub(crate) use extract_text::ExtractText;
Expand Down
1 change: 1 addition & 0 deletions src/ops/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
These operations split the corpus into smaller files of a defined max size.
!*/
use log::{debug, error, info, warn};
use std::{
borrow::Cow,
fs::File,
Expand Down

0 comments on commit 1e25228

Please sign in to comment.