From 2e816105d1893ae62a32b158125c7746990ab201 Mon Sep 17 00:00:00 2001 From: Brian Hie Date: Thu, 29 Nov 2018 07:57:10 -0500 Subject: [PATCH] enable batched alignment by defaul --- scanorama/scanorama.py | 22 ++++++++++++---------- setup.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/scanorama/scanorama.py b/scanorama/scanorama.py index ac7470d..5ef0ddc 100644 --- a/scanorama/scanorama.py +++ b/scanorama/scanorama.py @@ -23,19 +23,21 @@ # Default parameters. ALPHA = 0.10 APPROX = True +BATCH_SIZE = 5000 DIMRED = 100 HVG = None KNN = 20 N_ITER = 500 PERPLEXITY = 1200 +REALIGN = True SIGMA = 15 VERBOSE = 2 # Do batch correction on a list of data sets. def correct(datasets_full, genes_list, return_dimred=False, - batch_size=None, verbose=VERBOSE, ds_names=None, dimred=DIMRED, - approx=APPROX, sigma=SIGMA, alpha=ALPHA, knn=KNN, - return_dense=False, hvg=None, union=False, realign=False): + batch_size=BATCH_SIZE, verbose=VERBOSE, ds_names=None, + dimred=DIMRED, approx=APPROX, sigma=SIGMA, alpha=ALPHA, knn=KNN, + return_dense=False, hvg=None, union=False, realign=REALIGN): """Integrate and batch correct a list of data sets. Parameters @@ -47,7 +49,7 @@ def correct(datasets_full, genes_list, return_dimred=False, return_dimred: `bool`, optional (default: `False`) In addition to returning batch corrected matrices, also returns integrated low-dimesional embeddings. - batch_size: `int`, optional (default: `None`) + batch_size: `int`, optional (default: `5000`) The batch size used in the alignment vector computation. Useful when correcting very large (>100k samples) data sets. Set to large value that runs within available memory. @@ -108,10 +110,10 @@ def correct(datasets_full, genes_list, return_dimred=False, return datasets, genes # Integrate a list of data sets. -def integrate(datasets_full, genes_list, batch_size=None, verbose=VERBOSE, - ds_names=None, dimred=DIMRED, approx=APPROX, sigma=SIGMA, - alpha=ALPHA, knn=KNN, geosketch=False, geosketch_max=20000, - n_iter=1, union=False, hvg=None): +def integrate(datasets_full, genes_list, batch_size=BATCH_SIZE, + verbose=VERBOSE, ds_names=None, dimred=DIMRED, approx=APPROX, + sigma=SIGMA, alpha=ALPHA, knn=KNN, geosketch=False, + geosketch_max=20000, n_iter=1, union=False, hvg=None): """Integrate a list of data sets. Parameters @@ -120,7 +122,7 @@ def integrate(datasets_full, genes_list, batch_size=None, verbose=VERBOSE, Data sets to integrate and correct. genes_list: `list` of `list` of `string` List of genes for each data set. - batch_size: `int`, optional (default: `None`) + batch_size: `int`, optional (default: `5000`) The batch size used in the alignment vector computation. Useful when correcting very large (>100k samples) data sets. Set to large value that runs within available memory. @@ -758,7 +760,7 @@ def transform(curr_ds, curr_ref, ds_ind, ref_ind, sigma, cn=False, # values. def assemble(datasets, verbose=VERBOSE, view_match=False, knn=KNN, sigma=SIGMA, approx=APPROX, alpha=ALPHA, expr_datasets=None, - ds_names=None, batch_size=None, realign=False, geosketch=False, + ds_names=None, batch_size=None, realign=REALIGN, geosketch=False, geosketch_max=20000): if len(datasets) == 1: return datasets diff --git a/setup.py b/setup.py index 55713ea..ee1ad78 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,10 @@ setup( name='scanorama', - version='0.6', + version='0.6.1', description='Panoramic stitching of heterogeneous single cell transcriptomic data', url='https://github.com/brianhie/scanorama', - download_url='https://github.com/brianhie/scanorama/archive/v0.6.tar.gz', + download_url='https://github.com/brianhie/scanorama/archive/v0.6.1.tar.gz', packages=find_packages(exclude=['bin', 'conf', 'data', 'target']), install_requires=[ 'annoy>=1.11.5',