Skip to content

Commit

Permalink
enable batched alignment by defaul
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Hie committed Nov 29, 2018
1 parent aece144 commit 2e81610
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
22 changes: 12 additions & 10 deletions scanorama/scanorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,21 @@
# Default parameters.
ALPHA = 0.10
APPROX = True
BATCH_SIZE = 5000
DIMRED = 100
HVG = None
KNN = 20
N_ITER = 500
PERPLEXITY = 1200
REALIGN = True
SIGMA = 15
VERBOSE = 2

# Do batch correction on a list of data sets.
def correct(datasets_full, genes_list, return_dimred=False,
batch_size=None, verbose=VERBOSE, ds_names=None, dimred=DIMRED,
approx=APPROX, sigma=SIGMA, alpha=ALPHA, knn=KNN,
return_dense=False, hvg=None, union=False, realign=False):
batch_size=BATCH_SIZE, verbose=VERBOSE, ds_names=None,
dimred=DIMRED, approx=APPROX, sigma=SIGMA, alpha=ALPHA, knn=KNN,
return_dense=False, hvg=None, union=False, realign=REALIGN):
"""Integrate and batch correct a list of data sets.
Parameters
Expand All @@ -47,7 +49,7 @@ def correct(datasets_full, genes_list, return_dimred=False,
return_dimred: `bool`, optional (default: `False`)
In addition to returning batch corrected matrices, also returns
integrated low-dimesional embeddings.
batch_size: `int`, optional (default: `None`)
batch_size: `int`, optional (default: `5000`)
The batch size used in the alignment vector computation. Useful when
correcting very large (>100k samples) data sets. Set to large value
that runs within available memory.
Expand Down Expand Up @@ -108,10 +110,10 @@ def correct(datasets_full, genes_list, return_dimred=False,
return datasets, genes

# Integrate a list of data sets.
def integrate(datasets_full, genes_list, batch_size=None, verbose=VERBOSE,
ds_names=None, dimred=DIMRED, approx=APPROX, sigma=SIGMA,
alpha=ALPHA, knn=KNN, geosketch=False, geosketch_max=20000,
n_iter=1, union=False, hvg=None):
def integrate(datasets_full, genes_list, batch_size=BATCH_SIZE,
verbose=VERBOSE, ds_names=None, dimred=DIMRED, approx=APPROX,
sigma=SIGMA, alpha=ALPHA, knn=KNN, geosketch=False,
geosketch_max=20000, n_iter=1, union=False, hvg=None):
"""Integrate a list of data sets.
Parameters
Expand All @@ -120,7 +122,7 @@ def integrate(datasets_full, genes_list, batch_size=None, verbose=VERBOSE,
Data sets to integrate and correct.
genes_list: `list` of `list` of `string`
List of genes for each data set.
batch_size: `int`, optional (default: `None`)
batch_size: `int`, optional (default: `5000`)
The batch size used in the alignment vector computation. Useful when
correcting very large (>100k samples) data sets. Set to large value
that runs within available memory.
Expand Down Expand Up @@ -758,7 +760,7 @@ def transform(curr_ds, curr_ref, ds_ind, ref_ind, sigma, cn=False,
# values.
def assemble(datasets, verbose=VERBOSE, view_match=False, knn=KNN,
sigma=SIGMA, approx=APPROX, alpha=ALPHA, expr_datasets=None,
ds_names=None, batch_size=None, realign=False, geosketch=False,
ds_names=None, batch_size=None, realign=REALIGN, geosketch=False,
geosketch_max=20000):
if len(datasets) == 1:
return datasets
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

setup(
name='scanorama',
version='0.6',
version='0.6.1',
description='Panoramic stitching of heterogeneous single cell transcriptomic data',
url='https://github.com/brianhie/scanorama',
download_url='https://github.com/brianhie/scanorama/archive/v0.6.tar.gz',
download_url='https://github.com/brianhie/scanorama/archive/v0.6.1.tar.gz',
packages=find_packages(exclude=['bin', 'conf', 'data', 'target']),
install_requires=[
'annoy>=1.11.5',
Expand Down

0 comments on commit 2e81610

Please sign in to comment.