Skip to content

Commit 91b3f69

Browse files
committed
Merge branch 'release'
2 parents d1961f4 + 533155f commit 91b3f69

File tree

8 files changed

+800
-17
lines changed

8 files changed

+800
-17
lines changed

.coveralls.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
repo_token:
1+
repo_token: 5IbONs3nKWfng4aJUVWWoSjvgd3gfD4kN

.github/workflows/rust.yml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Rust
2+
3+
on: [push]
4+
5+
jobs:
6+
build:
7+
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v1
12+
- name: Build
13+
run: cargo build --verbose
14+
- name: Run tests
15+
run: cargo test --verbose

.travis.yml

+35-12
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,53 @@ language: rust
44
rust :
55
- stable
66
- nightly
7+
78
cache: cargo
89

9-
services:
10-
- docker
10+
addons:
11+
apt:
12+
packages:
13+
- libcurl4-openssl-dev
14+
- libelf-dev
15+
- libdw-dev
16+
- libbfd-dev
17+
- binutils-dev
18+
- cmake
19+
sources:
20+
- kalakris-cmake
1121

1222
before_script:
1323
- export PATH=$HOME/.cargo/bin:$PATH
14-
- cargo install cargo-update || echo "cargo-update already installed"
15-
- cargo install cargo-travis || echo "cargo-travis already installed"
16-
- cargo install cross
17-
- cargo install-update -a
24+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then cargo install cargo-update || echo "cargo-update already installed" ; fi
25+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then cargo install cargo-travis || echo "cargo-travis already installed" ; fi
26+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then cargo install-update -a ; fi
1827

1928
os:
2029
- linux
21-
sudo: false
30+
- windows
31+
2232
cache: cargo
33+
2334
script:
24-
- cargo test --verbose --all
25-
- cross build --target aarch64-unknown-linux-gnu
26-
- cross build --target x86_64-pc-windows-gnu
27-
- cross build --target *-apple-ios
35+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then ./openrefine-3.2/refine& fi
36+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then python3 -m http.server& fi
37+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then cargo test --verbose --all ; fi
38+
- cargo build --release
39+
40+
deploy:
41+
provider: releases
42+
api_key: $GITHUB_API_KEY
43+
file:
44+
- target/release/theses-fr-dumper
45+
- target/release/theses-fr-dumper.exe
46+
skip_cleanup: true
47+
on:
48+
repo: Eonm/theses-fr-dumper
49+
branch: release
50+
tags: true
2851

2952
after_success:
30-
- cargo coveralls --exclude-pattern=/.cargo,src/download/mod.rs,src/post_processors/mod.rs,src/main.rs
53+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then cargo coveralls --exclude-pattern=/.cargo,src/main.rs,scr/cli.rs ; fi
3154

3255
notifications:
3356
email: false

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "theses-fr-dumper"
3-
version = "0.1.0"
3+
version = "0.0.1"
44
authors = ["eonm <eon.mathis@gmail.com>"]
55
edition = "2018"
66

LICENSE

+674
Large diffs are not rendered by default.

README.md

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
<div class="center" align="center">
2+
3+
# Theses.fr dumper
4+
5+
[![Build Status](https://travis-ci.org/Eonm/theses-fr-dumper.svg?branch=master)](https://travis-ci.org/Eonm/theses-fr-dumper)
6+
[![Coverage Status](https://coveralls.io/repos/github/Eonm/theses-fr-dumper/badge.svg?branch=master)](https://coveralls.io/github/Eonm/theses-fr-dumper?branch=master)
7+
[![made-with-Rust](https://img.shields.io/badge/Made%20with-Rust-1f425f.svg)](https://www.rust-lang.org/)
8+
[![License: MIT](https://img.shields.io/badge/License-GPLv3-yellow.svg)](https://www.gnu.org/licenses/gpl-3.0.html)
9+
[![dependency status](https://deps.rs/repo/github/eonm/theses-fr-dumper/status.svg)](https://deps.rs/repo/github/eonm/theses-fr-dumper)
10+
11+
</div>
12+
13+
_Theses.fr dumper_ permet de récupérer les données de [theses.fr](https://www.theses.fr) par lots.
14+
15+
## Usage
16+
17+
__en utilisant un fichier de sortie__
18+
19+
```sh
20+
theses-fr-dumper -s 0 15 30 -f jsonl -o dump.jsonl
21+
```
22+
23+
__en utilisant un pipe__
24+
25+
```sh
26+
theses-fr-dumper -s 0 15 30 -f jsonl | grep -i "lorem ipsum"
27+
```
28+
29+
### Création d'une séquence de téléchargement `-s num num num`
30+
31+
La séquence de téléchargement s'exprime de la façon suivante : `-s début incrément fin`.
32+
33+
Si aucune séquence de téléchargement n'est spécifiée _theses.fr dumper_ téléchargera l'ensemble des notices par lot de 10&nbsp;000.
34+
35+
### Formats de récupération des données `-f`
36+
37+
* CSV
38+
* Json
39+
* [Jsonl](http://jsonlines.org/)
40+
* XML (à venir)
41+
42+
### Fichier de sortie `-o`
43+
44+
Cet argument permet de préciser le fichier de sortie. Si un fichier existe déjà son contenu sera effacé.
45+
46+
Sans l'argument `-o` les informations récupérées du serveur sont affichées dans directement dans console.
47+
48+
### Mode de connexion `-m keep-alive/reset`
49+
50+
_Theses.fr dumper_ permet de grader la connexion ouverte avec le serveur grâce à l'option `-m keep-alive`. Tous les lots seront téléchargé par la même connexion.
51+
52+
> :warning: L'option keep-alive peut entraîner un time out côté serveur. Par défaut chaque téléchargement de lots entraîne la création d'une nouvelle connexion avec le serveur.
53+
54+
## Build
55+
56+
```sh
57+
cargo build --release
58+
```
59+
60+
## Test
61+
62+
```sh
63+
cargo test
64+
```

src/download/mod.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::thread;
22
use std::time::Duration;
3-
3+
use serde_json;
44
use indicatif::ProgressBar;
55

66
#[derive(Debug, PartialEq)]
@@ -52,3 +52,10 @@ fn download_data(client: reqwest::RequestBuilder, debounce: Option<u64>) -> Stri
5252
.join()
5353
.expect("Faild to start thread")
5454
}
55+
56+
pub fn get_total_reccords() -> String {
57+
reqwest::get("https://www.theses.fr/?q=*:*&format=json&type=avancee&rows=0").expect("Failed to get theses.fr data")
58+
.json::<serde_json::value::Value>()
59+
.expect("Failed to get json data")["response"]["numFound"]
60+
.to_string()
61+
}

src/main.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mod url_builder;
1717
use url_builder::{csv_url_builder, json_url_builder};
1818

1919
mod download;
20-
use download::{download_in_sequence, DownloadMod};
20+
use download::{download_in_sequence, DownloadMod, get_total_reccords};
2121
fn main() {
2222
let matches = App::new("Theses.fr dumper")
2323
.version("0.1.0")
@@ -76,7 +76,7 @@ fn main() {
7676
.collect();
7777
sequence(s[0], s[1], s[2])
7878
} else {
79-
panic!("-s argument required")
79+
sequence(0, 10_000, get_total_reccords().parse::<u32>().expect("Invalid sequence number"))
8080
};
8181

8282
let post_process = if let Some(output_file) = matches.value_of("output") {

0 commit comments

Comments
 (0)