Skip to content

Commit

Permalink
Aggregate Data
Browse files Browse the repository at this point in the history
  • Loading branch information
eduairet committed Jul 18, 2024
1 parent 1d4ae86 commit f5ae0e8
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 1 deletion.
1 change: 1 addition & 0 deletions database/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ edition = "2021"
shared = { path = "../shared" }
rusqlite = "0.31.0"
postgres = "0.19.7"
csv = "1.3.0"
21 changes: 21 additions & 0 deletions database/artist.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
DisplayName,Nationality,Gender
Charles Arnoldi,American,male
David Aronson,American,male
Irene Aronson,American,female
Jean Arp,French,male
Richard Artschwager,American,male
Ruth Asawa,American,female
Charles Robert Ashbee,British,male
Erik Gunnar Asplund,Swedish,male
Genevieve Asse,French,female
Alvar Aalto,Finnish,male
Aino Aalto,Finnish,female
Eero Aarnio,Finnish,male
Magdalena Abakanowicz,Polish,female
James Abbe,American,male
Berenice Abbott,American,female
Robert Abel,American,male
Raimund Abraham,American,male
Ivor Abrahams,British,male
Rodolfo Abularach,Guatemalan,male
Vito Acconci,American,male
75 changes: 75 additions & 0 deletions database/src/postgres.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use csv::ReaderBuilder;
use postgres::{Client, Error, NoTls};
use std::fs::File;

/// Create a new database.
///
Expand Down Expand Up @@ -180,3 +182,76 @@ pub fn insert_data(connection_string: &str, data: Vec<&str>) -> Result<(), Error

Ok(())
}

/// Create a new database from a CSV file.
///
/// # Arguments
///
/// * `connection_string` - A connection string to the PostgreSQL server.
/// * `db` - The name of the database to create.
/// * `file_path` - The path to the CSV file.
///
/// # Returns
///
/// A `Result` indicating whether the operation was successful.
///
/// # Example
///
/// ```ignore
/// use database::create_db_from_csv;
///
/// let connection_string = "postgresql://postgres:@localhost";
/// let db_name = "moma";
/// let file_path = "artist.csv";
/// let table_name = "artist";
///
/// let result = create_db_from_csv(connection_string, db_name, file_path, table_name);
/// assert!(result.is_ok());
/// ```
pub fn create_db_from_csv(
connection_string: &str,
db: &str,
file_path: &str,
table_name: &str,
) -> Result<(), Error> {
create_db(connection_string, db)?;

let file = File::open(file_path).unwrap();
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(file);

let headers = rdr.headers().unwrap().clone();

let columns: Vec<String> = headers
.iter()
.map(|h| format!("{} VARCHAR NULL", h.replace(" ", "_")))
.collect();

let columns_clean: Vec<String> = headers.iter().map(|h| h.replace(" ", "_")).collect();

let create_table_query = format!(
"CREATE TABLE IF NOT EXISTS {} (id SERIAL PRIMARY KEY,{})",
table_name,
columns.join(", ")
);

let connection_string_full = format!("{}/{}", connection_string, db);
let mut client = Client::connect(&connection_string_full, NoTls)?;
client.batch_execute(&create_table_query)?;

for result in rdr.records() {
let record = result.unwrap();
let values: Vec<String> = record
.iter()
.map(|v| format!("'{}'", v.replace("'", "''")))
.collect();
let insert_query = format!(
"INSERT INTO {} ({}) VALUES ({})",
table_name,
columns_clean.join(", "),
values.join(", ")
);
client.batch_execute(&insert_query)?;
}

Ok(())
}
37 changes: 36 additions & 1 deletion database/tests/tests_postgres.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use database::{add_tables, create_db, delete_db, insert_data};
use database::{add_tables, create_db, create_db_from_csv, delete_db, insert_data};
use postgres::{Client, Error, NoTls};
use std::collections::HashMap;

Expand Down Expand Up @@ -122,4 +122,39 @@ mod tests_postgres {

let _ = fixture_delete_db(&connection_string, &db_name);
}

#[test]
#[ignore]
fn test_create_db_from_csv() {
let connection_string = "postgresql://postgres:@localhost";
let db_name = "moma";
let file_path = "artist.csv";
let table_name = "artist";

let _ = fixture_delete_db(&connection_string, &db_name);

let result = create_db_from_csv(connection_string, db_name, file_path, table_name);
assert!(result.is_ok());

let connection_string_full = format!("{}/{}", connection_string, db_name);
let mut client = Client::connect(&connection_string_full, NoTls).unwrap();

for row in client
.query(
"SELECT Nationality, COUNT(Nationality) AS Count
FROM artist GROUP BY Nationality ORDER BY Count DESC",
&[],
)
.unwrap()
{
let (nationality, count): (Option<String>, Option<i64>) = (row.get(0), row.get(1));

assert!(nationality.is_some());
assert!(count.is_some());
println!("{:?} {:?}", nationality, count);
}

client.close().unwrap();
let _ = fixture_delete_db(&connection_string, &db_name);
}
}

0 comments on commit f5ae0e8

Please sign in to comment.