diff --git a/DESCRIPTION b/DESCRIPTION index 99a37aa..e7d6924 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: seriation Type: Package Title: Infrastructure for Ordering Objects Using Seriation -Version: 1.5.5.1 -Date: 2024-xx-xx +Version: 1.5.6 +Date: 2024-08-13 Authors@R: c( person("Michael", "Hahsler", role = c("aut", "cre", "cph"), email = "mhahsler@lyle.smu.edu", @@ -52,7 +52,7 @@ Suggests: dbscan, testthat, umap Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) License: GPL-3 Copyright: The code in src/bbwrcg.f, src/arsa.f and src/bburcg.f diff --git a/NEWS.md b/NEWS.md index 344a071..453ef76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# seriation 1.5.5.1 (xx/xx/2024) +# seriation 1.5.6 (08/13/2024) ## New Features - Added registered_by field to registries. @@ -6,6 +6,7 @@ ## Changes - We replaced the FORTRAN implementation for BEA with code from package TSP. - ME is now calculated using C code. +- optimal.c: updated memory allocation to R allocation. ## Bug Fixes - Added two missing package anchors to palette man page. diff --git a/README.Rmd b/README.Rmd index 18e141b..184fe1c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,7 +21,7 @@ visual assessment of cluster tendency plots (VAT and iVAT). Here are some quick guides on applications of seriation: -* [Introduction the R package seriation](https://cran.r-project.org/web/packages/seriation/vignettes/seriation.pdf) +* [Introduction the R package seriation](https://cran.r-project.org/package=seriation/vignettes/seriation.pdf) * [How to reorder heatmaps](https://mhahsler.github.io/seriation/heatmaps.html) * [How to reorder correlation matrices](https://mhahsler.github.io/seriation/correlation_matrix.html) * [How to evaluate clusters using dissimilarity plots](https://mhahsler.github.io/seriation/clustering.html) diff --git a/README.md b/README.md index cef9b78..430a414 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ assessment of cluster tendency plots (VAT and iVAT). Here are some quick guides on applications of seriation: - [Introduction the R package - seriation](https://cran.r-project.org/web/packages/seriation/vignettes/seriation.pdf) + seriation](https://cran.r-project.org/package=seriation/vignettes/seriation.pdf) - [How to reorder heatmaps](https://mhahsler.github.io/seriation/heatmaps.html) - [How to reorder correlation @@ -230,7 +230,8 @@ install.packages("seriation") ``` r install.packages("seriation", - repos = c("https://mhahsler.r-universe.dev". "https://cloud.r-project.org/")) + repos = c("https://mhahsler.r-universe.dev", + "https://cloud.r-project.org/")) ``` ## Usage @@ -298,10 +299,7 @@ measures. Note that some measures are merit measures while others represent cost. See the manual page for details. ``` r -rbind( - alphabetical = criterion(d), - seriated = criterion(d, order) -) +rbind(alphabetical = criterion(d), seriated = criterion(d, order)) ``` ## 2SUM AR_deviations AR_events BAR Gradient_raw Gradient_weighted diff --git a/src/optimal.c b/src/optimal.c index 79fe334..7dd850e 100644 --- a/src/optimal.c +++ b/src/optimal.c @@ -71,15 +71,15 @@ SEXP order_length(SEXP R_dist, SEXP R_order) { if (LENGTH(R_dist) != n * (n - 1) / 2) error("order_length: length of \"dist\" and \"order\" do not match"); - o = Calloc(n, int); + o = R_Calloc(n, int); for (k = 0; k < n; k++) /* offset to C indexing */ - o[k] = INTEGER(R_order)[k]-1; + o[k] = INTEGER(R_order)[k]-1; PROTECT(R_obj = NEW_NUMERIC(1)); REAL(R_obj)[0] = orderLength(REAL(R_dist), o, n); - Free(o); + R_Free(o); UNPROTECT(1); @@ -276,14 +276,14 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { /* copy similarities into lower triangle */ - x = Calloc(n*n, double); /* data + part order lengths + temporary */ + x = R_Calloc(n*n, double); /* data + part order lengths + temporary */ k = 0; for (i = 0; i < n-1; i++) for (j = i+1; j < n; j++) { z = REAL(R_dist)[k++]; if (!R_FINITE(z)) { - Free(x); + R_Free(x); error("order_optimal: \"dist\" invalid"); } else @@ -302,12 +302,12 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { GetRNGstate(); - l = Calloc(n, int); /* offset of leftmost leaf of left tree */ - r = Calloc(n, int); /* offset of leftmost leaf of right tree; + l = R_Calloc(n, int); /* offset of leftmost leaf of left tree */ + r = R_Calloc(n, int); /* offset of leftmost leaf of right tree; * reverse mapping of order */ - c = Calloc(n-1, int); /* number of leaves in a tree */ + c = R_Calloc(n-1, int); /* number of leaves in a tree */ - e = Calloc(n*n, int); /* inner endpoints */ + e = R_Calloc(n*n, int); /* inner endpoints */ /* for each tree count the number of leaves. */ @@ -395,13 +395,13 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { /* compute temporary sums for all endpoints */ if (!calcAllOrder(x, e, oll, olr, or, cll, clr, cr, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } if (olr != oll) if (!calcAllOrder(x, e, olr, oll, or, clr, cll, cr, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } @@ -421,13 +421,13 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { /* compute best orders for all endpoints */ if (!calcAllOrder(x, e, orl, orr, ol, crl, crr, cl, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } if (orr != orl) if (!calcAllOrder(x, e, orr, orl, ol, crr, crl, cl, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } @@ -467,24 +467,24 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { */ if (!calcEndOrder(x, e, oll, olr, cll, clr, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } if (olr != oll) if (!calcEndOrder(x, e, olr, oll, clr, cll, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } if (!calcEndOrder(x, e, orl, orr, crl, crr, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } if (orr != orl) if (!calcEndOrder(x, e, orr, orl, crr, crl, n)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } @@ -517,7 +517,7 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { } } if (!R_FINITE(z)) { - Free(x); Free(r); Free(l); Free(c); Free(e); + R_Free(x); R_Free(r); R_Free(l); R_Free(c); R_Free(e); error("order_optimal: non-finite values"); } } @@ -620,11 +620,11 @@ SEXP order_optimal(SEXP R_dist, SEXP R_merge) { } } - Free(x); - Free(l); - Free(r); - Free(c); - Free(e); + R_Free(x); + R_Free(l); + R_Free(r); + R_Free(c); + R_Free(e); PutRNGstate(); diff --git a/src/stress.c b/src/stress.c index e6c6287..de79f46 100644 --- a/src/stress.c +++ b/src/stress.c @@ -17,7 +17,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ - #include #include @@ -34,46 +33,51 @@ * NA if there was no legal edge at all. */ -double stressMoore(double *x, int *r, int *c, int nr, int nc, int nrx) { +double stressMoore(double *x, int *r, int *c, int nr, int nc, int nrx) +{ double d, v, z; R_xlen_t i, j, l, ll, k, kk; z = 0; l = r[0]; - for (i = 0; i < nr-1; i++) { - ll = r[i+1]; - k = c[0] * nrx; - for (j = 0; j < nc-1; j++) { - kk = c[j+1] * nrx; - v = x[l+k]; - if (!ISNAN(v)) { - d = v - x[ll+k]; + for (i = 0; i < nr - 1; i++) + { + ll = r[i + 1]; + k = c[0] * nrx; + for (j = 0; j < nc - 1; j++) + { + kk = c[j + 1] * nrx; + v = x[l + k]; + if (!ISNAN(v)) + { + d = v - x[ll + k]; if (!ISNAN(d)) z += d * d; - d = v - x[ll+kk]; + d = v - x[ll + kk]; if (!ISNAN(d)) z += d * d; - d = v - x[l+kk]; + d = v - x[l + kk]; if (!ISNAN(d)) z += d * d; } - d = x[ll+k] - x[l+kk]; + d = x[ll + k] - x[l + kk]; k = kk; if (!ISNAN(d)) z += d * d; } - d = x[l+k] - x[ll+k]; - l = ll; + d = x[l + k] - x[ll + k]; + l = ll; if (!ISNAN(d)) z += d * d; R_CheckUserInterrupt(); } k = c[0] * nrx; - for (j = 0; j < nc-1; j++) { - kk = c[j+1] * nrx; - d = x[l+k] - x[l+kk]; - k = kk; + for (j = 0; j < nc - 1; j++) + { + kk = c[j + 1] * nrx; + d = x[l + k] - x[l + kk]; + k = kk; if (!ISNAN(d)) z += d * d; } @@ -85,40 +89,45 @@ double stressMoore(double *x, int *r, int *c, int nr, int nc, int nrx) { * neighboring points on the diagonals are excluded. */ -double stressNeumann(double *x, int *r, int *c, int nr, int nc, int nrx) { +double stressNeumann(double *x, int *r, int *c, int nr, int nc, int nrx) +{ double d, v, z; R_xlen_t i, j, l, ll, k, kk; z = 0; l = r[0]; - for (i = 0; i < nr-1; i++) { - ll = r[i+1]; - k = c[0] * nrx; - for (j = 0; j < nc-1; j++) { - kk = c[j+1] * nrx; - v = x[l+k]; - if (!ISNAN(v)) { - d = v - x[ll+k]; + for (i = 0; i < nr - 1; i++) + { + ll = r[i + 1]; + k = c[0] * nrx; + for (j = 0; j < nc - 1; j++) + { + kk = c[j + 1] * nrx; + v = x[l + k]; + if (!ISNAN(v)) + { + d = v - x[ll + k]; if (!ISNAN(d)) z += d * d; - d = v - x[l+kk]; + d = v - x[l + kk]; if (!ISNAN(d)) z += d * d; } k = kk; } - d = x[l+k] - x[ll+k]; - l = ll; + d = x[l + k] - x[ll + k]; + l = ll; if (!ISNAN(d)) z += d * d; R_CheckUserInterrupt(); } k = c[0] * nrx; - for (j = 0; j < nc-1; j++) { - kk = c[j+1] * nrx; - d = x[l+k] - x[l+kk]; - k = kk; + for (j = 0; j < nc - 1; j++) + { + kk = c[j + 1] * nrx; + d = x[l + k] - x[l + kk]; + k = kk; if (!ISNAN(d)) z += d * d; } @@ -129,7 +138,8 @@ double stressNeumann(double *x, int *r, int *c, int nr, int nc, int nrx) { /* R wrapper to the stress functions */ -SEXP stress(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_type) { +SEXP stress(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_type) +{ int nrx, nr, nc; R_xlen_t k; @@ -145,7 +155,7 @@ SEXP stress(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_type) { (STRING_ELT), R_x)); */ - nrx = INTEGER(GET_DIM(R_x))[0]; /* number of rows */ + nrx = INTEGER(GET_DIM(R_x))[0]; /* number of rows */ nr = LENGTH(R_r); nc = LENGTH(R_c); @@ -160,23 +170,26 @@ SEXP stress(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_type) { /* copy and shift indexes */ for (k = 0; k < nr; k++) - r[k] = INTEGER(R_r)[k]-1; + r[k] = INTEGER(R_r)[k] - 1; for (k = 0; k < nc; k++) - c[k] = INTEGER(R_c)[k]-1; + c[k] = INTEGER(R_c)[k] - 1; PROTECT(R_obj = NEW_NUMERIC(1)); - switch (INTEGER(R_type)[0]) { + switch (INTEGER(R_type)[0]) + { case 1: - REAL(R_obj)[0] = stressMoore(REAL(R_x), r, c, nr, nc, nrx); + REAL(R_obj) + [0] = stressMoore(REAL(R_x), r, c, nr, nc, nrx); break; case 2: - REAL(R_obj)[0] = stressNeumann(REAL(R_x), r, c, nr, nc, nrx); + REAL(R_obj) + [0] = stressNeumann(REAL(R_x), r, c, nr, nc, nrx); break; default: Free(r); - Free(c); - error("stress: type not implemented"); + Free(c); + error("stress: type not implemented"); } Free(r); Free(c); @@ -208,21 +221,24 @@ SEXP stress(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_type) { */ void distMoore(double *x, int *r, int *c, int nr, int nc, int nrx, int ncx, - double *d, double *t) { + double *d, double *t) +{ double v, w, z; R_xlen_t i, ii, j, jj, k, kk, kkk, l; - for (k = 0; k < nr*(nr-1)/2; k++) /* initialize distances */ -d[k] = 0; + for (k = 0; k < nr * (nr - 1) / 2; k++) /* initialize distances */ + d[k] = 0; - for (i = 0; i < nr; i++) { - z = 0; + for (i = 0; i < nr; i++) + { + z = 0; ii = r[i] * ncx; kk = c[0] * nrx; - for (k = 0; k < nc-1; k++) { - kkk = c[k+1] * nrx; - w = x[ii+kk] - x[ii+kkk]; + for (k = 0; k < nc - 1; k++) + { + kkk = c[k + 1] * nrx; + w = x[ii + kk] - x[ii + kkk]; if (!ISNAN(w)) z += w * w; kk = kkk; @@ -231,29 +247,33 @@ d[k] = 0; R_CheckUserInterrupt(); } l = 0; - for (i = 0; i < nr-1; i++) { + for (i = 0; i < nr - 1; i++) + { ii = r[i] * ncx; - for (j = i+1; j < nr; j++) { - z = t[i] + t[j]; + for (j = i + 1; j < nr; j++) + { + z = t[i] + t[j]; jj = r[j] * ncx; kk = c[0] * nrx; - for (k = 0; k < nc-1; k++) { - kkk = c[k+1] * nrx; - v = x[ii+kk]; - if (!ISNAN(v)) { - w = v - x[jj+kk]; + for (k = 0; k < nc - 1; k++) + { + kkk = c[k + 1] * nrx; + v = x[ii + kk]; + if (!ISNAN(v)) + { + w = v - x[jj + kk]; if (!ISNAN(w)) z += w * w; - w = v - x[jj+kkk]; + w = v - x[jj + kkk]; if (!ISNAN(w)) z += w * w; } - w = x[jj+kk] - x[ii+kkk]; + w = x[jj + kk] - x[ii + kkk]; if (!ISNAN(w)) z += w * w; kk = kkk; } - w = x[ii+kk] - x[jj+kk]; + w = x[ii + kk] - x[jj + kk]; if (!ISNAN(w)) z += w * w; @@ -270,21 +290,24 @@ d[k] = 0; */ void distNeumann(double *x, int *r, int *c, int nr, int nc, int nrx, int ncx, - double *d, double *t) { + double *d, double *t) +{ double w, z; R_xlen_t i, ii, j, jj, k, kk, kkk, l; - for (k = 0; k < nr*(nr-1)/2; k++) /* initialize distances */ + for (k = 0; k < nr * (nr - 1) / 2; k++) /* initialize distances */ d[k] = 0; - for (i = 0; i < nr; i++) { - z = 0; + for (i = 0; i < nr; i++) + { + z = 0; ii = r[i] * ncx; kk = c[0] * nrx; - for (k = 0; k < nc-1; k++) { - kkk = c[k+1] * nrx; - w = x[ii+kk] - x[ii+kkk]; + for (k = 0; k < nc - 1; k++) + { + kkk = c[k + 1] * nrx; + w = x[ii + kk] - x[ii + kkk]; if (!ISNAN(w)) z += w * w; kk = kkk; @@ -293,19 +316,22 @@ void distNeumann(double *x, int *r, int *c, int nr, int nc, int nrx, int ncx, R_CheckUserInterrupt(); } l = 0; - for (i = 0; i < nr-1; i++) { + for (i = 0; i < nr - 1; i++) + { ii = r[i] * ncx; - for (j = i+1; j < nr; j++) { - z = t[i] + t[j]; + for (j = i + 1; j < nr; j++) + { + z = t[i] + t[j]; jj = r[j] * ncx; - for (k = 0; k < nc-1; k++) { + for (k = 0; k < nc - 1; k++) + { kk = c[k] * nrx; - w = x[ii+kk]- x[jj+kk]; + w = x[ii + kk] - x[jj + kk]; if (!ISNAN(w)) z += w * w; } kk = c[k] * nrx; - w = x[ii+kk] - x[jj+kk]; + w = x[ii + kk] - x[jj + kk]; if (!ISNAN(w)) z += w * w; @@ -318,7 +344,8 @@ void distNeumann(double *x, int *r, int *c, int nr, int nc, int nrx, int ncx, /* R wrapper */ -SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) { +SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) +{ int nrx, nr, nc; R_xlen_t k; @@ -326,44 +353,45 @@ SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) { double *d, *t; - SEXP R_obj = R_NilValue; /* compiler hack */ + SEXP R_obj = R_NilValue; /* compiler hack */ + /* Translation form character to int index not needed + * R part makes sure it is int! + PROTECT(R_r = arraySubscript(0, R_r, GET_DIM(R_x), getAttrib, + (STRING_ELT), R_x)); + PROTECT(R_c = arraySubscript(1, R_c, GET_DIM(R_x), getAttrib, + (STRING_ELT), R_x)); + */ -/* Translation form character to int index not needed - * R part makes sure it is int! - PROTECT(R_r = arraySubscript(0, R_r, GET_DIM(R_x), getAttrib, - (STRING_ELT), R_x)); - PROTECT(R_c = arraySubscript(1, R_c, GET_DIM(R_x), getAttrib, - (STRING_ELT), R_x)); - */ - - nrx = INTEGER(GET_DIM(R_x))[0]; /* number of rows */ + nrx = INTEGER(GET_DIM(R_x))[0]; /* number of rows */ nr = LENGTH(R_r); nc = LENGTH(R_c); -/* remap R indexes to C indexes - * this sucks! - */ + /* remap R indexes to C indexes + * this sucks! + */ r = Calloc(nr, int); c = Calloc(nc, int); -/* copy and shift indexes */ + /* copy and shift indexes */ for (k = 0; k < nr; k++) - r[k] = INTEGER(R_r)[k]-1; + r[k] = INTEGER(R_r)[k] - 1; for (k = 0; k < nc; k++) - c[k] = INTEGER(R_c)[k]-1; + c[k] = INTEGER(R_c)[k] - 1; - switch(LOGICAL(R_bycol)[0]) { + switch (LOGICAL(R_bycol)[0]) + { case 0: - PROTECT(R_obj = NEW_NUMERIC(nr*(nr-1)/2)); + PROTECT(R_obj = NEW_NUMERIC(nr * (nr - 1) / 2)); d = REAL(R_obj); t = Calloc(nr, double); - switch(INTEGER(R_type)[0]) { + switch (INTEGER(R_type)[0]) + { case 1: distMoore(REAL(R_x), r, c, nr, nc, nrx, 1, d, t); break; @@ -372,19 +400,20 @@ SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) { break; default: Free(r); - Free(c); - Free(t); - error("stress_dist: \"type\" not implemented"); + Free(c); + Free(t); + error("stress_dist: \"type\" not implemented"); } Free(t); break; case 1: - PROTECT(R_obj = NEW_NUMERIC(nc*(nc-1)/2)); + PROTECT(R_obj = NEW_NUMERIC(nc * (nc - 1) / 2)); d = REAL(R_obj); t = Calloc(nc, double); - switch(INTEGER(R_type)[0]) { + switch (INTEGER(R_type)[0]) + { case 1: distMoore(REAL(R_x), c, r, nc, nr, 1, nrx, d, t); break; @@ -393,16 +422,16 @@ SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) { break; default: Free(r); - Free(c); - Free(t); - error("stress_dist: type not implemented"); + Free(c); + Free(t); + error("stress_dist: type not implemented"); } Free(t); break; default: Free(r); - Free(c); - error("stress_dist: \"bycol\" invalid"); + Free(c); + error("stress_dist: \"bycol\" invalid"); } Free(r); Free(c); @@ -412,4 +441,3 @@ SEXP stress_dist(SEXP R_x, SEXP R_r, SEXP R_c, SEXP R_bycol, SEXP R_type) { return R_obj; } - diff --git a/tests/testthat/test-zzz_seriate_extra.R b/tests/testthat/test-zzz_seriate_extra.R index 78f4771..8512dd9 100644 --- a/tests/testthat/test-zzz_seriate_extra.R +++ b/tests/testthat/test-zzz_seriate_extra.R @@ -34,17 +34,19 @@ if(seriation:::check_installed("dbscan", "check")) { expect_equal(length(o[[1]]), 4L) } -# this is very slow see we only for 10 iterations +# The following tests are too slow for CRAN and skipped skip_on_cran() +# this is very slow see we only do 10 iterations if(seriation:::check_installed("GA", "check")) { register_GA() o <- seriate(d, "GA", maxiter = 10, parallel = FALSE, verb = F) expect_equal(length(o[[1]]), 4L) } -# This produces too many messages -# Python (keras) leaves some files in temp and that upsets CRAN +# Notes: +# * This produces too many messages +# * Python (keras) leaves some files in temp and that upsets CRAN skip() # only do 10 epochs.