diff --git a/R/euclidean_logical_joins.R b/R/euclidean_logical_joins.R index 82b54a8..1015bae 100644 --- a/R/euclidean_logical_joins.R +++ b/R/euclidean_logical_joins.R @@ -6,11 +6,11 @@ #' about the distance and not the similarity. Therefore, a lower value means a #' higher similarity. #' @param r Hyperparameter used to govern the sensitivity of the locality -#' sensitive hash. Corresponds to the width of the hash bucket in the LSH -#' algorithm. Increasing values of `r` mean more hash collisions and higher -#' sensitivity (fewer false-negatives) at the cost of lower specificity (more -#' false-positives and longer run time). For more information, see the -#' description in . +#' sensitive hash. Corresponds to the width of the hash bucket in the LSH +#' algorithm. Increasing values of `r` mean more hash collisions and higher +#' sensitivity (fewer false-negatives) at the cost of lower specificity (more +#' false-positives and longer run time). For more information, see the +#' description in \doi{10.1145/997817.997857}. #' #' @return A tibble fuzzily-joined on the basis of the variables in `by.` Tries #' to adhere to the same standards as the dplyr-joins, and uses the same diff --git a/R/string_group.R b/R/string_group.R index 5cd8708..be181fc 100644 --- a/R/string_group.R +++ b/R/string_group.R @@ -1,7 +1,7 @@ #' Fuzzy String Grouping Using Minhashing #' #' Performs fuzzy string grouping in which similar strings are assigned to the -#' same group. Uses the `fastgreedy.community` community detection algorithm +#' same group. Uses the `cluster_fast_greedy()` community detection algorithm #' from the `igraph` package to create the groups. Must have igraph installed #' in order to use this function. #' @@ -67,7 +67,12 @@ jaccard_string_group <- function(string, n_gram_width = 2, n_bands = 45, band_wi graph <- igraph::graph_from_edgelist(pairs) - fc <- igraph::fastgreedy.community(igraph::as.undirected(graph)) + if (packageVersion("igraph") < "2.0.0") { + fc <- igraph::fastgreedy.community(igraph::as.undirected(graph)) + } else { + fc <- igraph::cluster_fast_greedy(igraph::as.undirected(graph)) + } + groups <- igraph::groups(fc) lookup_table <- vapply(groups, "[[", integer(1), 1) diff --git a/man/euclidean-joins.Rd b/man/euclidean-joins.Rd index ce7e8c9..a068b2c 100644 --- a/man/euclidean-joins.Rd +++ b/man/euclidean-joins.Rd @@ -95,7 +95,7 @@ sensitive hash. Corresponds to the width of the hash bucket in the LSH algorithm. Increasing values of \code{r} mean more hash collisions and higher sensitivity (fewer false-negatives) at the cost of lower specificity (more false-positives and longer run time). For more information, see the -description in \url{doi:10.1145/997817.997857}.} +description in \doi{10.1145/997817.997857}.} \item{progress}{Set to \code{TRUE} to print progress.} } diff --git a/man/jaccard_string_group.Rd b/man/jaccard_string_group.Rd index 9946482..032c44e 100644 --- a/man/jaccard_string_group.Rd +++ b/man/jaccard_string_group.Rd @@ -53,7 +53,7 @@ the same group, which is given a standardized name. } \description{ Performs fuzzy string grouping in which similar strings are assigned to the -same group. Uses the \code{fastgreedy.community} community detection algorithm +same group. Uses the \code{cluster_fast_greedy()} community detection algorithm from the \code{igraph} package to create the groups. Must have igraph installed in order to use this function. }