Skip to content

Commit

Permalink
Merge pull request #3 from richardellison/master
Browse files Browse the repository at this point in the history
Add index creation function and option for store_bikedata
  • Loading branch information
mpadge authored Mar 18, 2017
2 parents f9f93f0 + a86ca0b commit e96aaf9
Show file tree
Hide file tree
Showing 13 changed files with 265 additions and 11 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Authors@R: c(
person("Mark", "Padgham", email="mark.padgham@email.com", role=c("aut", "cre")),
person("Richard", "Ellison", role="aut"))
Description: Load data from public hire bicycle systems.
Depends:
Depends:
R (>= 3.3.0)
License: GPL-3 + file LICENSE
SystemRequirements: C++11
Expand All @@ -25,4 +25,4 @@ Suggests:
LinkingTo:
BH,
Rcpp
RoxygenNote: 6.0.1
RoxygenNote: 5.0.1
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(dl_bikedata)
export(get_datelimits)
export(store_bikedata)
export(tripmat)
export(createDBIndexes)
import(RSQLite)
importFrom(Rcpp,evalCpp)
importFrom(dplyr,"%>%")
Expand Down
16 changes: 16 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,19 @@ importDataToSpatialite <- function(datafiles, spdb, quiet) {
.Call('bikedata_importDataToSpatialite', PACKAGE = 'bikedata', datafiles, spdb, quiet)
}

#' Create indexes in database
#'
#' Creates the specified indexes in the database to speed up queries. Note
#' that for the full dataset this may take some time.
#'
#' @param spdb A string containing the path to the spatialite database to
#' use.
#' @param tables A vector with the tables for which to create indexes. This
#' vector should be the same length as the cols vector.
#' @param cols A vector with the fields for which to create indexes.
#'
#' @return integer result code
createDBIndexes <- function(spdb, tables, cols) {
.Call('bikedata_createDBIndexes', PACKAGE = 'bikedata', spdb, tables, cols)
}

24 changes: 21 additions & 3 deletions R/dl_bikedata.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,22 @@ citibike_files <- function(){
#'
#' @param city City for which to download bike data
#' @param data_dir Directory to which to download the files
#' @param dates Character vector of dates to download data with dates formated
#' as YYYYMM.
#'
#' @note Only files that don't already exist in \code{data_dir} will be
#' downloaded, and this function may thus be used to update a directory of files
#' by downloading more recent files.
#'
#' @export
dl_bikedata <- function(city='nyc', data_dir = tempdir())
dl_bikedata <- function(city='nyc', data_dir = tempdir(), dates = NULL)
{
files <- file.path (data_dir, basename (citibike_files ()))
indx <- which (!file.exists (files))
if (is.null(dates) == TRUE) {
indx <- which (!file.exists (files))
} else {
indx <- which (!file.exists (files) & grepl(paste(dates,collapse="|"), files))
}
if (length (indx) > 0)
{
for (f in citibike_files () [indx])
Expand All @@ -95,12 +101,14 @@ dl_bikedata <- function(city='nyc', data_dir = tempdir())
#' @param spdb A string containing the path to the spatialite database to
#' use. It will be created automatically.
#' @param quiet If FALSE, progress is displayed on screen
#' @param create_index If TRUE, creates an index on the start and end station
#' IDs and start and stop times.
#'
#' @note This function can take quite a long time to execute (typically > 10
#' minutes), and generates a spatialite database file several gigabytes in size.
#'
#' @export
store_bikedata <- function (data_dir, spdb, quiet=FALSE)
store_bikedata <- function (data_dir, spdb, quiet=FALSE, create_index = TRUE)
{
if (file.exists (spdb))
stop ('File named ', spdb, ' already exists')
Expand All @@ -124,5 +132,15 @@ store_bikedata <- function (data_dir, spdb, quiet=FALSE)
if (!quiet)
message ('total trips read = ',
format (ntrips, big.mark=',', scientific=FALSE))
if (create_index == TRUE) {
if (!quiet) {
message ('Creating indexes')
}
createDBIndexes(spdb,
tables = rep("trips", times=8),
cols = c("start_station_id", "end_station_id", "start_time", "stop_time",
"cast(start_time as date)", "cast(start_time as time)",
"cast(stop_time as date)", "cast(stop_time as time)"))
}
invisible (file.remove (flist_csv))
}
8 changes: 4 additions & 4 deletions R/tripmat.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ filter_tripmat_by_datetime <- function (db, ...)
"FROM trips WHERE ")
qry_dt <- NULL
if ('start_time' %in% names (x))
qry_dt <- c (qry_dt, paste0 ("time (start_time) >= '", x$start_time, "' "))
qry_dt <- c (qry_dt, paste0 ("cast(start_time as time) >= '", x$start_time, "' "))
if ('end_time' %in% names (x))
qry_dt <- c (qry_dt, paste0 ("time (stop_time) <= '", x$end_time, "' "))
qry_dt <- c (qry_dt, paste0 ("cast(stop_time as time) <= '", x$end_time, "' "))
if ('start_date' %in% names (x))
qry_dt <- c (qry_dt, paste0 ("date (start_time) >= '", x$start_date, "' "))
qry_dt <- c (qry_dt, paste0 ("cast(start_time as date) >= '", x$start_date, "' "))
if ('end_date' %in% names (x))
qry_dt <- c (qry_dt, paste0 ("date (stop_time) <= '", x$end_date, "' "))
qry_dt <- c (qry_dt, paste0 ("cast(stop_time as date) <= '", x$end_date, "' "))

qry_wd <- NULL
if ('weekday' %in% names (x))
Expand Down
1 change: 1 addition & 0 deletions man/bike_stations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/createDBIndexes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/dl_bikedata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/filter_tripmat_by_datetime.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/importDataToSpatialite.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/store_bikedata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,16 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// createDBIndexes
int createDBIndexes(const char* spdb, Rcpp::CharacterVector tables, Rcpp::CharacterVector cols);
RcppExport SEXP bikedata_createDBIndexes(SEXP spdbSEXP, SEXP tablesSEXP, SEXP colsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const char* >::type spdb(spdbSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type tables(tablesSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type cols(colsSEXP);
rcpp_result_gen = Rcpp::wrap(createDBIndexes(spdb, tables, cols));
return rcpp_result_gen;
END_RCPP
}
149 changes: 149 additions & 0 deletions src/spatialitedb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,71 @@ char *strtokm(char *str, const char *delim)
return tok;
}

// Function to compare version numbers
// First argument is compared to the second argument
// Return value:
// -1 = Argument one version lower than Argument two version
// 0 = Argument one version equal to Argument two version
// 1 = Argument one version higher than Argument two version
int compare_version_numbers (std::string vstro, std::string compvstro) {

int versiondiff = 0;

char *vstr = (char *)vstro.c_str();
char *compvstr = (char *)compvstro.c_str();

char *vstrtok, *compvstrtok;
char *vstrtokptr, *compvstrtokptr;

vstrtok = strtok_r(vstr, ".", &vstrtokptr);
compvstrtok = strtok_r (compvstr, ".", &compvstrtokptr);

if (atoi(vstrtok) < atoi(compvstrtok)) {
versiondiff = -1;
}
else if (atoi(vstrtok) > atoi(compvstrtok)) {
versiondiff = 1;
}
else {
while (vstrtok != NULL && compvstrtok != NULL && versiondiff == 0) {

vstrtok = strtok_r (NULL, ".", &vstrtokptr);
compvstrtok = strtok_r (NULL, ".", &compvstrtokptr);

if (vstrtok == NULL && compvstrtok == NULL) {
versiondiff = 0;
}
else if (vstrtok == NULL && compvstrtok != NULL) {
if (atoi(compvstrtok) == 0) {
versiondiff = 0;
}
else {
versiondiff = -1;
}
}
else if (vstrtok != NULL && compvstrtok == NULL) {
if (atoi(vstrtok) == 0) {
versiondiff = 0;
}
else {
versiondiff = 1;
}
}
else if (atoi(vstrtok) < atoi(compvstrtok)) {
versiondiff = -1;
}
else if (atoi(vstrtok) > atoi(compvstrtok)) {
versiondiff = 1;
}

}

}

return versiondiff;

}

void rm_dos_end (char *str)
{
char *p = strrchr (str, '\r');
Expand Down Expand Up @@ -273,3 +338,87 @@ int importDataToSpatialite (Rcpp::CharacterVector datafiles,

return(trip_id);
}


//' Create indexes in database
//'
//' Creates the specified indexes in the database to speed up queries. Note
//' that for the full dataset this may take some time.
//'
//' @param spdb A string containing the path to the spatialite database to
//' use.
//' @param tables A vector with the tables for which to create indexes. This
//' vector should be the same length as the cols vector.
//' @param cols A vector with the fields for which to create indexes.
//'
//' @return integer result code
// [[Rcpp::export]]
int createDBIndexes (const char* spdb,
Rcpp::CharacterVector tables,
Rcpp::CharacterVector cols)
{

sqlite3 *dbcon;
char *zErrMsg = 0;
const char *zStmtMsg;
int rc;
void* cache = spatialite_alloc_connection();

rc = sqlite3_open_v2(spdb, &dbcon, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL);
if (rc != SQLITE_OK)
throw std::runtime_error ("Can't establish sqlite3 connection");
spatialite_init_ex(dbcon, cache, 0);

char *idxsql = NULL;
sqlite3_stmt *versionstmt;
char *sqliteversion = (char *)"0.1";



if (rc == SQLITE_OK) {

rc = sqlite3_prepare_v2(dbcon, "SELECT sqlite_version()", -1, &versionstmt, 0);
if (rc != SQLITE_OK) {
throw std::runtime_error ("Unable to retrieve sqlite version");
}
rc = sqlite3_step(versionstmt);

if (rc == SQLITE_ROW) {
sqliteversion = (char *)sqlite3_column_text(versionstmt, 0);
}
rc = sqlite3_reset(versionstmt);

for (unsigned int i = 0; i < cols.length(); ++i) {

if (((std::string)cols[i]).find("(") == std::string::npos ||
compare_version_numbers(sqliteversion, "3.9.0") >= 0) {

std::string idxname = "idx_" + tables[i] + "_" + (std::string)cols[i];
boost::replace_all(idxname, "(", "_");
boost::replace_all(idxname, ")", "_");
boost::replace_all(idxname, " ", "_");

int sprrc = asprintf(&idxsql, "CREATE INDEX %s ON %s(%s)", idxname.c_str(), (char *)(tables[i]), (char *)(cols[i]));

rc = sqlite3_exec(dbcon, idxsql, NULL, NULL, &zErrMsg);
if (rc != SQLITE_OK) {
throw std::runtime_error ("Unable to execute index query: " + (std::string)idxsql);
}

}
else {
Rcpp::warning("Unable to create index on " + cols[i] + ", expression not supported in SQLite version < 3.9.0");
}

}

}

rc = sqlite3_close_v2(dbcon);
if (rc != SQLITE_OK) {
throw std::runtime_error ("Unable to close sqlite database");
}

return(rc);

}

0 comments on commit e96aaf9

Please sign in to comment.