Skip to content

Commit

Permalink
feat: add a new sub command 'source' to print the database schema (#226)
Browse files Browse the repository at this point in the history
  • Loading branch information
Filip Kieres authored Oct 14, 2022
1 parent 8a918eb commit 7714dd0
Show file tree
Hide file tree
Showing 18 changed files with 506 additions and 48 deletions.
113 changes: 113 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ replibyte -c conf.yaml dump restore remote -v latest
## Features

- [x] Support data dump and restore for PostgreSQL, MySQL and MongoDB
- [x] Analyze your data schema 🔎
- [x] Replace sensitive data with fake data
- [x] Works on large database (> 10GB)
- [x] Database Subsetting: Scale down a production database to a more reasonable size 🔥
- [x] Start a local database with the prod data in a single command 🔥
- [x] Start a local database with the production data in a single command 🔥
- [x] On-the-fly data (de)compression (Zlib)
- [x] On-the-fly data de/encryption (AES-256)
- [x] Fully stateless (no server, no daemon) and lightweight binary 🍃
Expand Down
2 changes: 1 addition & 1 deletion dump-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bson = "2.1"
bson = "2.2"
serde = "1.0"

########## WARNING #############
Expand Down
34 changes: 34 additions & 0 deletions dump-parser/src/mysql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,40 @@ pub fn get_single_quoted_string_value_at_position(tokens: &Vec<Token>, pos: usiz
None
}

pub fn get_column_names_from_create_query(tokens: &Vec<Token>) -> Vec<String> {
if !match_keyword_at_position(Create, &tokens, 0) {
return Vec::new();
}

let mut consumed = false;
tokens
.iter()
.skip_while(|token| match **token {
Token::LParen => false,
_ => true,
})
.take_while(|token| match **token {
Token::RParen => false,
_ => true,
})
.filter_map(|token| match token {
Token::Comma => {
consumed = false;
None
}
Token::SingleQuotedString(name) => {
if consumed {
None
} else {
consumed = true;
Some(name.as_str().to_string())
}
}
_ => None,
})
.collect::<Vec<_>>()
}

pub fn get_column_names_from_insert_into_query(tokens: &Vec<Token>) -> Vec<&str> {
if !match_keyword_at_position(Keyword::Insert, &tokens, 0)
|| !match_keyword_at_position(Keyword::Into, &tokens, 2)
Expand Down
34 changes: 34 additions & 0 deletions dump-parser/src/postgres/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,40 @@ pub fn get_column_values_str_from_insert_into_query(tokens: &Vec<Token>) -> Vec<
.collect::<Vec<_>>()
}

pub fn get_column_names_from_create_query(tokens: &Vec<Token>) -> Vec<String> {
if !match_keyword_at_position(Create, &tokens, 0) {
return Vec::new();
}

let mut consumed = false;
tokens
.iter()
.skip_while(|token| match **token {
Token::LParen => false,
_ => true,
})
.take_while(|token| match **token {
Token::RParen => false,
_ => true,
})
.filter_map(|token| match token {
Token::Comma => {
consumed = false;
None
}
Token::Word(word) => {
if consumed {
None
} else {
consumed = true;
Some(word.value.as_str().to_string())
}
}
_ => None,
})
.collect::<Vec<_>>()
}

pub fn get_tokens_from_query_str(query: &str) -> Vec<Token> {
// query by query
let mut tokenizer = Tokenizer::new(query);
Expand Down
3 changes: 2 additions & 1 deletion replibyte/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ timeago = "0.3"
indicatif = "0.16"
http = "0.2"
flate2 = "1.0"
bson = "2.1"
bson = "2.2"
aes-gcm = "0.9"
which = "4.2.5"
mongodb-schema-parser = { git = "https://github.com/mongodb-rust/mongodb-schema-parser.git", rev = "2d489307dd70b63b216a9968f7dec7c217108b32" }
url = "2.2.2"
tempfile = "3.3"
ctrlc = "3.2.1"
Expand Down
12 changes: 11 additions & 1 deletion replibyte/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ pub enum SubCommand {
/// all dump commands
#[clap(subcommand)]
Dump(DumpCommand),
/// all transformers command
/// all source commands
#[clap(subcommand)]
Source(SourceCommand),
/// all transformer commands
#[clap(subcommand)]
Transformer(TransformerCommand),
}
Expand Down Expand Up @@ -122,3 +125,10 @@ pub struct DumpDeleteArgs {
#[clap(long, group = "delete-mode")]
pub keep_last: Option<usize>,
}

/// all source commands
#[derive(Subcommand, Debug)]
pub enum SourceCommand {
/// Show the database schema. When used with MongoDB, the schema will be probabilistic and returned as a JSON document
Schema,
}
1 change: 1 addition & 0 deletions replibyte/src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod dump;
pub mod source;
pub mod transformer;
56 changes: 56 additions & 0 deletions replibyte/src/commands/source.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use std::io::{Error, ErrorKind};

use crate::config::{Config, ConnectionUri};
use crate::source::Explain;
use crate::source::mongodb::MongoDB;
use crate::source::mysql::Mysql;
use crate::source::postgres::Postgres;

/// show the database schema
pub fn schema(config: Config) -> anyhow::Result<()> {
match config.source {
Some(source) => {
match source.connection_uri()? {
ConnectionUri::Postgres(host, port, username, password, database) => {
let postgres = Postgres::new(
host.as_str(),
port,
database.as_str(),
username.as_str(),
password.as_str(),
);

postgres.schema()?;

Ok(())
}
ConnectionUri::Mysql(host, port, username, password, database) => {
let mysql = Mysql::new(
host.as_str(),
port,
database.as_str(),
username.as_str(),
password.as_str(),
);

mysql.schema()?;

Ok(())
}
ConnectionUri::MongoDB(uri, database) => {
let mongodb = MongoDB::new(uri.as_str(), database.as_str());

mongodb.schema()?;

Ok(())
}
}
}
None => {
Err(anyhow::Error::from(Error::new(
ErrorKind::Other,
"missing <source> object in the configuration file",
)))
}
}
}
Loading

0 comments on commit 7714dd0

Please sign in to comment.