diff --git a/pixell/bench.py b/pixell/bench.py
new file mode 100644
index 00000000..fb71054b
--- /dev/null
+++ b/pixell/bench.py
@@ -0,0 +1,85 @@
+import time as _time
+from contextlib import contextmanager
+from . import bunch
+
+"""bench: Simple timing of python code blocks.
+
+Example usage:
+
+	1. Manual printing
+
+	from pixell import bench
+	for i in range(nfile):
+		with bench.mark("all"):
+			with bench.mark("read"):
+				a = np.loadtxt(afiles[i])
+				b = np.loadtxt(bfiles[i])
+			with bench.mark("sum"):
+				a += b
+			with bench.mark("write"):
+				np.savetxt(ofiles[i], a)
+		print("Processed case %d in %7.4f s. read %7.4f sum %7.4f write %7.4f" % (i, bench.t.all, bench.t.read, bench.t.sum, bench.t.write))
+	print("Total %7.4f s. read %7.4f sum %7.4f write %7.4f" % (i, bench.t_tot.all, bench.t_tot.read, bench.t_tot.sum, bench.t_tot.write))
+
+	2. Quick-and-dirty printing
+
+	from pixell import bench
+	for i in range(nfile):
+		with bench.show("read"):
+			a = np.loadtxt(afiles[i])
+			b = np.loadtxt(bfiles[i])
+		with bench.show("sum"):
+			a += b
+		with bench.show("write"):
+			np.savetxt(ofiles[i], a)
+
+	bench.show is equivalent to bench.mark, just with an extra print.
+	This means that bench.show updates .ttot and .n just like bench.mark
+	does.
+
+The examples above collect statistics globally. You can create local
+benchmark objects with bench.Bench(). Example:
+
+	from pixell import bench
+	mybench = bench.Bench()
+	with mybench.mark("example"):
+		do_something()
+
+The overhead of bench.mark is around 3 µs.
+"""
+
+# Just wall times for now, but could be extended to measure
+# cpu time or leaked memory
+class Bench:
+	def __init__(self):
+		self.t_tot  = bunch.Bunch()
+		self.t      = bunch.Bunch()
+		self.n      = bunch.Bunch()
+	@contextmanager
+	def mark(self, name):
+		if name not in self.n:
+			self.t_tot[name] = 0
+			self.n[name]    = 0
+		t1 = _time.time()
+		try:
+			yield
+		finally:
+			t2 = _time.time()
+			self.n[name] += 1
+			self.t    [name] = t2-t1
+			self.t_tot[name] += t2-t1
+	@contextmanager
+	def show(self, name):
+		try:
+			with self.mark(name):
+				yield
+		finally:
+			print("%7.4f s (last) %7.4f s (mean) %4d (n) %s" % (self.t[name], self.t_tot[name]/self.n[name], self.n[name], name))
+
+# Global interface
+_default = Bench()
+mark  = _default.mark
+show  = _default.show
+t_tot = _default.t_tot
+t     = _default.t
+n     = _default.n
diff --git a/pixell/curvedsky.py b/pixell/curvedsky.py
index b6d886db..ef802671 100644
--- a/pixell/curvedsky.py
+++ b/pixell/curvedsky.py
@@ -425,6 +425,10 @@ def __init__(self, lmax=None, mmax=None, nalm=None, stride=1, layout="triangular
 		if nalm is not None:
 			assert self.nelem == nalm, "lmax must be explicitly specified when lmax != mmax"
 		self.mstart= mstart.astype(np.uint64, copy=False)
+	@property
+	def nl(self): return self.lmax+1
+	@property
+	def nm(self): return self.mmax+1
 	def lm2ind(self, l, m):
 		return (self.mstart[m].astype(int, copy=False)+l*self.stride).astype(int, copy=False)
 	def get_map(self):
diff --git a/pixell/enmap.py b/pixell/enmap.py
index 49d0513e..da280990 100644
--- a/pixell/enmap.py
+++ b/pixell/enmap.py
@@ -77,7 +77,7 @@ def posaxes(self, safe=True, corner=False, dtype=np.float64): return posaxes(sel
 	def pixmap(self): return pixmap(self.shape, self.wcs)
 	def laxes(self, oversample=1, method="auto"): return laxes(self.shape, self.wcs, oversample=oversample, method=method)
 	def lmap(self, oversample=1): return lmap(self.shape, self.wcs, oversample=oversample)
-	def lform(self): return lform(self)
+	def lform(self, method="auto"): return lform(self, method=method)
 	def modlmap(self, oversample=1, min=0): return modlmap(self.shape, self.wcs, oversample=oversample, min=min)
 	def modrmap(self, ref="center", safe=True, corner=False): return modrmap(self.shape, self.wcs, ref=ref, safe=safe, corner=corner)
 	def lbin(self, bsize=None, brel=1.0, return_nhit=False, return_bins=False, lop=None): return lbin(self, bsize=bsize, brel=brel, return_nhit=return_nhit, return_bins=return_bins, lop=lop)
@@ -99,12 +99,12 @@ def npix(self): return np.prod(self.shape[-2:])
 	@property
 	def geometry(self): return self.shape, self.wcs
 	def resample(self, oshape, off=(0,0), method="fft", border="wrap", corner=False, order=3): return resample(self, oshape, off=off, method=method, border=border, corner=corner, order=order)
-	def project(self, shape, wcs, order=3, border="constant", cval=0, safe=True): return project(self, shape, wcs, order, border=border, cval=cval, safe=safe)
+	def project(self, shape, wcs, mode="spline", order=3, border="constant", cval=0, safe=True): return project(self, shape, wcs, order, mode=mode, border=border, cval=cval, safe=safe)
 	def extract(self, shape, wcs, omap=None, wrap="auto", op=lambda a,b:b, cval=0, iwcs=None, reverse=False): return extract(self, shape, wcs, omap=omap, wrap=wrap, op=op, cval=cval, iwcs=iwcs, reverse=reverse)
 	def extract_pixbox(self, pixbox, omap=None, wrap="auto", op=lambda a,b:b, cval=0, iwcs=None, reverse=False): return extract_pixbox(self, pixbox, omap=omap, wrap=wrap, op=op, cval=cval, iwcs=iwcs, reverse=reverse)
 	def insert(self, imap, wrap="auto", op=lambda a,b:b, cval=0, iwcs=None): return insert(self, imap, wrap=wrap, op=op, cval=cval, iwcs=iwcs)
 	def insert_at(self, pix, imap, wrap="auto", op=lambda a,b:b, cval=0, iwcs=None): return insert_at(self, pix, imap, wrap=wrap, op=op, cval=cval, iwcs=iwcs)
-	def at(self, pos, order=3, border="constant", cval=0.0, unit="coord", safe=True): return at(self, pos, order, border=border, cval=0, unit=unit, safe=safe)
+	def at(self, pos, mode="spline", order=3, border="constant", cval=0.0, unit="coord", safe=True, ip=None): return at(self, pos, mode=mode, order=order, border=border, cval=0, unit=unit, safe=safe, ip=ip)
 	def argmax(self, unit="coord"): return argmax(self, unit=unit)
 	def autocrop(self, method="plain", value="auto", margin=0, factors=None, return_info=False): return autocrop(self, method, value, margin, factors, return_info)
 	def apod(self, width, profile="cos", fill="zero"): return apod(self, width, profile=profile, fill=fill)
@@ -546,11 +546,51 @@ def contains(shape, wcs, pos, unit="coord"):
 	else:               pix = pos
 	return np.all((pix>=0)&(pix.T<shape[-2:]).T,0)
 
-def project(map, shape, wcs, order=3, border="constant", cval=0.0, force=False, safe=True, bsize=1000, ip=None):
-	"""Project the map into a new map given by the specified
-	shape and wcs, interpolating as necessary.
-	This uses local interpolation, and will lose information
-	when downgrading compared to averaging down."""
+def project(map, shape, wcs, mode="spline", order=3, border="constant",
+		cval=0.0, force=False, safe=True, bsize=1000, context=50, ip=None):
+	"""Project map to a new geometry.
+
+	This function is not suited for going down in resolution, because
+	only interpolation is done, not averaging. This means that if
+	the output geometry has lower resolution than the input, then
+	information will be lost because noise will not average down the
+	way it optimally would.
+
+	* map: enmap.ndmap of shape [...,ny,nx]
+	* shape, wcs: The geometry to project to
+	* mode: The interpolation mode. Same meaning as in utils.interpol.
+	  Valid values are "nearest", "linear", "cubic", "spline" and "fourier".
+	  "nearest" and "linear" are local interpolations, where one does
+	  not need to worry about edge effects and ringing. "cubic" and
+	  especially "fourier" are sensitive to the boundary conditions,
+	  and maps may need to be apodized first.  Only "fourier"
+	  interpolation preserves map power on all scales. The other
+	  types lose a bit of power at high multipoles.
+	  fourier > cubic > linear > nearest for high-l fidelity.
+	  "spline" is a generalization of "nearest", "linear" and "cubic",
+	  depending on the "order" argument: 0, 1 and 3.
+	* order: Controls the "spline" mode. See above.
+	* border: The boundary condition to assume for spline interpolation.
+	  Ignored for Fourier-interpolation, which always assumes periodic
+	  boundary conditions. Defaults to "constant", where areas outside
+	  the map are assumed to have the constant value "cval".
+	* cval: See "border".
+	* force: Force interpolation, even when the input and output pixels
+	  are directly compatible, so no interpolation is necessary. Normally
+	  the faster enmap.extract is used in this case.
+	* safe: If True (default) make extra effort to resolve 2pi sky
+	  wrapping degeneracies in the coordinate conversion.
+	* bsize: The interpolation is done in blocks in the y axis to save
+	  memory. This argument controls how many rows are processed at once.
+	* context: How much to pad each y block by. Used to avoid ringing due
+	  to discontinuities at block boundaries. Defaults to 50.
+	* ip: An interpolator object as returned by utils.interpolator(). If
+	  provided, this interpolator is used directly, and the interpolation
+	  arguments (mode, order, border, cval) are ignored. If the
+	  interpolator does not count as "prefiltered" (meaning that each use of
+	  the interpolator could incurr a potentially large cost regardless of
+	  how few points are interpolated), then the whole map is processed in
+	  one go, ignoring bsize"""
 	# Skip expensive operation if map is compatible
 	if not force:
 		if wcsutils.equal(map.wcs, wcs) and tuple(shape[-2:]) == tuple(shape[-2:]):
@@ -560,15 +600,28 @@ def project(map, shape, wcs, order=3, border="constant", cval=0.0, force=False,
 	omap = zeros(map.shape[:-2]+shape[-2:], wcs, map.dtype)
 	# Save memory by looping over rows. This won't work for non-"prefiltered" interpolators
 	if ip and not ip.prefiltered: bsize=100000000
+	# Avoid unneccessary padding for local cases
+	if   ip or (mode == "spline" and order == 0): context = 0
+	elif        mode == "spline" and order == 1 : context = 1
+	# It would have been nice to be able to use padtiles here, but
+	# the input and output tilings are very different
 	for i1 in range(0, shape[-2], bsize):
 		i2     = min(i1+bsize, shape[-2])
 		somap  = omap[...,i1:i2,:]
 		pix    = map.sky2pix(somap.posmap(), safe=safe)
-		y1     = max(np.min(pix[0]).astype(int)-3,0)
-		y2     = min(np.max(pix[0]).astype(int)+3,map.shape[-2])
-		if y2-y1 <= 0: continue
-		pix[0] -= y1
-		somap[:] = utils.interpol(map[...,y1:y2,:], pix, order=order, border=border, cval=cval, ip=ip)
+		if ip:
+			# Can't subdivide interpolator
+			band = map
+		else:
+			y1     = np.min(pix[0]).astype(int)-context
+			y2     = np.max(pix[0]).astype(int)+context+1
+			pix[0]-= y1
+			band   = map.extract_pixbox([[y1,0],[y2,map.shape[-1]]])
+			# Apodize if necessary
+			if context > 1:
+				band = apod(band, width=(context,0), fill="crossfade")
+		# And do the interpolation
+		somap[:] = utils.interpol(band, pix, mode=mode, order=order, border=border, cval=cval, ip=ip)
 	return omap
 
 def pixbox_of(iwcs,oshape,owcs):
@@ -721,9 +774,9 @@ def neighborhood_pixboxes(shape, wcs, poss, r):
 	res[...,1,:] += 1
 	return res
 
-def at(map, pos, order=3, border="constant", cval=0.0, unit="coord", safe=True, ip=None):
+def at(map, pos, mode="spline", order=3, border="constant", cval=0.0, unit="coord", safe=True, ip=None):
 	if unit != "pix": pos = sky2pix(map.shape, map.wcs, pos, safe=safe)
-	return utils.interpol(map, pos, order=order, border=border, cval=cval, ip=ip)
+	return utils.interpol(map, pos, mode=mode, order=order, border=border, cval=cval, ip=ip)
 
 def argmax(map, unit="coord"):
 	"""Return the coordinates of the maximum value in the specified map.
@@ -1962,21 +2015,17 @@ def crop_geometry(shape, wcs, box=None, pixbox=None, oshape=None):
 	# pixel i would be included for i-0.5, but not for i-0.6. We should
 	# thefore use rounding boundaries, we just have to make sure it's
 	# numerically stable
-
-
-	print("box", box/utils.degree)
-	print("mid", np.mean(box,0)/utils.degree)
-	print("pixbox", pixbox)
-	pixbox = utils.nint(pixbox)
-	print("pixbox2", pixbox)
-
-
+	#print("box", box/utils.degree)
+	#print("mid", np.mean(box,0)/utils.degree)
+	#print("pixbox", pixbox)
+	#pixbox = utils.nint(pixbox)
+	#print("pixbox2", pixbox)
 	# Handle 1d case
 	if pixbox.ndim == 1:
 		if oshape is None: raise ValueError("crop_geometry needs an explicit output shape when given a 1d box (i.e. a single point instead of a bounding box")
 		shp    = np.array(oshape[-2:])
 		pixbox = np.array([pixbox-shp//2,pixbox-shp//2+shp])
-	print("pixbox3", pixbox)
+	#print("pixbox3", pixbox)
 	# Can now proceed assuming 2d
 	oshape = tuple(shape[:-2]) + tuple(np.abs(pixbox[1]-pixbox[0]))
 	owcs   = wcs.deepcopy()
@@ -2144,7 +2193,7 @@ def shrink_mask(mask, r):
 	"""Shrink the True part of boolean mask "mask" by a distance of r radians"""
 	return mask.distance_transform(rmax=r) >= r
 
-def pad(emap, pix, return_slice=False, wrap=False):
+def pad(emap, pix, return_slice=False, wrap=False, value=0):
 	"""Pad enmap "emap", creating a larger map with zeros filled in on the sides.
 	How much to pad is controlled via pix, which har format [{from,to},{y,x}],
 	[{y,x}] or just a single number to apply on all sides. E.g. pix=5 would pad
@@ -2159,7 +2208,7 @@ def pad(emap, pix, return_slice=False, wrap=False):
 	w = emap.wcs.deepcopy()
 	w.wcs.crpix += pix[0,::-1]
 	# Construct a slice between the new and old map
-	res = zeros(emap.shape[:-2]+tuple([s+sum(p) for s,p in zip(emap.shape[-2:],pix.T)]),wcs=w, dtype=emap.dtype)
+	res = full(emap.shape[:-2]+tuple([s+sum(p) for s,p in zip(emap.shape[-2:],pix.T)]),wcs=w, val=value, dtype=emap.dtype)
 	mslice = (Ellipsis,slice(pix[0,0],res.shape[-2]-pix[1,0]),slice(pix[0,1],res.shape[-1]-pix[1,1]))
 	res[mslice] = emap
 	if wrap:
@@ -2263,36 +2312,72 @@ def _widen(map,n):
 def laplace(m):
 	return -ifft(fft(m)*np.sum(m.lmap()**2,0)).real
 
-def apod(m, width, profile="cos", fill="zero"):
-	"""Apodize the provided map. Currently only cosine apodization is
-	implemented.
-
-    Args:
-        imap: (...,Ny,Nx) or (Ny,Nx) ndarray to be apodized
-        width: The width in pixels of the apodization on each edge.
-        profile: The shape of the apodization. Only "cos" is supported.
-	"""
-	width = np.minimum(np.zeros(2)+width,m.shape[-2:]).astype(np.int32)
-	if profile == "cos":
-		a = [0.5*(1-np.cos(np.linspace(0,np.pi,w))) for w in width]
-	else:
-		raise ValueError("Unknown apodization profile %s" % profile)
-	res = m.copy()
+#def apod(m, width, profile="cos", fill="zero"):
+#	"""Apodize the provided map. Currently only cosine apodization is
+#	implemented.
+#
+#    Args:
+#        imap: (...,Ny,Nx) or (Ny,Nx) ndarray to be apodized
+#        width: The width in pixels of the apodization on each edge.
+#        profile: The shape of the apodization. Only "cos" is supported.
+#	"""
+#	width = np.minimum(np.zeros(2)+width,m.shape[-2:]).astype(np.int32)
+#	if profile == "cos":
+#		a = [0.5*(1-np.cos(np.linspace(0,np.pi,w))) for w in width]
+#	else:
+#		raise ValueError("Unknown apodization profile %s" % profile)
+#	res = m.copy()
+#	if fill == "mean":
+#		offset = np.asarray(np.mean(res,(-2,-1)))[...,None,None]
+#		res -= offset
+#	elif fill == "median":
+#		offset = np.asarray(np.median(res,(-2,-1)))[...,None,None]
+#		res -= offset
+#	if width[0] > 0:
+#		res[...,:width[0],:] *= a[0][:,None]
+#		res[...,-width[0]:,:] *= a[0][::-1,None]
+#	if width[1] > 0:
+#		res[...,:,:width[1]] *= a[1][None,:]
+#		res[...,:,-width[1]:]  *= a[1][None,::-1]
+#	if fill == "mean" or fill == "median":
+#		res += offset
+#	return res
+
+def apod(map, width, profile="cos", fill="zero", inplace=False):
+	width = (np.zeros(2,int)+width).astype(int)
+	if not inplace: map = map.copy()
 	if fill == "mean":
-		offset = np.asarray(np.mean(res,(-2,-1)))[...,None,None]
-		res -= offset
+		offset = np.mean(map,(-2,-1))[...,None,None]
+		map   -= offset
 	elif fill == "median":
-		offset = np.asarray(np.median(res,(-2,-1)))[...,None,None]
-		res -= offset
-	if width[0] > 0:
-		res[...,:width[0],:] *= a[0][:,None]
-		res[...,-width[0]:,:] *= a[0][::-1,None]
-	if width[1] > 0:
-		res[...,:,:width[1]] *= a[1][None,:]
-		res[...,:,-width[1]:]  *= a[1][None,::-1]
-	if fill == "mean" or fill == "median":
-		res += offset
-	return res
+		offset = np.median(map,(-2,-1))[...,None,None]
+		map   -= offset
+	# Process the axes one by one
+	for i, w in enumerate(width):
+		if w <= 0: continue
+		if fill == "crossfade":
+			x = np.arange(1,w+1,dtype=map.dtype)/(2*w+1)
+		else:
+			x = np.arange(1,w+1,dtype=map.dtype)/(w+1)
+		if   profile == "lin": prof = apod_profile_lin(x)
+		elif profile == "cos": prof = apod_profile_cos(x)
+		else: raise ValueError("Unknown apodization profile '%s'" % str(profile))
+		# Apply the apodization
+		slice1 = (Ellipsis,)+(slice(None),)*i    +(slice(0,w),)+(slice(None),)*(1-i)
+		slice2 = (Ellipsis,)+(slice(None),)*i    +(slice(-w,None),)+(slice(None),)*(1-i)
+		broad  = (None,)*i+(slice(None),)+(None,)*(1-i)
+		m1 = map[slice1].copy()
+		m2 = map[slice2].copy()
+		if fill == "crossfade":
+			map[slice1] = m1*(1-prof)[::-1][broad]+m2*prof[::-1][broad]
+			map[slice2] = m2*(1-prof)[broad]+m1*prof[broad]
+		elif fill in ["mean", "median", "zero"]:
+			map[slice1] *= prof[broad]
+			map[slice2] *= prof[::-1][broad]
+	# Add in offsets if necessary
+	if fill in ["mean", "median"]:
+		map += offset
+	return map
 
 def apod_profile_lin(x): return x
 def apod_profile_cos(x): return 0.5*(1-np.cos(np.pi*x))
@@ -2310,7 +2395,7 @@ def apod_mask(mask, width=1*utils.degree, edge=True, profile=apod_profile_cos):
 	r = mask.distance_transform(rmax=width)
 	return profile(r/width)
 
-def lform(map):
+def lform(map, method="auto"):
 	"""Given an enmap, return a new enmap that has been fftshifted (unless shift=False),
 	and which has had the wcs replaced by one describing fourier space. This is mostly
 	useful for plotting or writing 2d power spectra.
@@ -2319,12 +2404,12 @@ def lform(map):
 	are assumed to need conversion between degrees and radians, sky2pix etc. get confused
 	when applied to lform-maps."""
 	omap = fftshift(map)
-	omap.wcs = lwcs(map.shape, map.wcs)
+	omap.wcs = lwcs(map.shape, map.wcs, method=method)
 	return omap
 
-def lwcs(shape, wcs):
+def lwcs(shape, wcs, method="auto"):
 	"""Build world coordinate system for l-space"""
-	lres   = 2*np.pi/extent(shape, wcs, signed=True)
+	lres   = 2*np.pi/extent(shape, wcs, signed=True, method=method)
 	ny, nx = shape[-2:]
 	owcs   = wcsutils.explicit(crpix=[nx//2+1,ny//2+1], crval=[0,0], cdelt=lres[::-1])
 	return owcs
diff --git a/pixell/fft.py b/pixell/fft.py
index 88c69ad2..94e3466a 100644
--- a/pixell/fft.py
+++ b/pixell/fft.py
@@ -426,7 +426,7 @@ def transfer(dest, source, norm, op):
 	return out
 
 def interpol_nufft(a, inds, out=None, axes=None, normalize=True,
-		periodicity=None, epsilon=None, nthread=None, nofft=False):
+		periodicity=None, epsilon=None, nthread=None, nofft=False, complex=False):
 	"""Given some array a[{pre},{dims}] interpolate it at the given
 	inds[len(dims),{post}], resulting in an output with shape [{pre},{post}].
 	The signal is assumed to be periodic with the size of a unless this is overridden
@@ -442,9 +442,9 @@ def interpol_nufft(a, inds, out=None, axes=None, normalize=True,
 	# with this is that a full fourier-array needs to be allocated. I can save
 	# some memory by instead doing the fft per field, at the cost of it being
 	# a bit hacky
-	op = None if nofft else lambda a, h: fft.fft(a, nthread=h.nthread, axes=h.axes)
+	op = None if nofft else lambda a, h: fft(a, nthread=h.nthread, axes=h.axall)
 	return u2nu(a, inds, out=out, axes=axes, periodicity=periodicity,
-		epsilon=epsilon, nthread=nthread, normalize=normalize, complex=False, op=op)
+		epsilon=epsilon, nthread=nthread, normalize=normalize, complex=complex, op=op)
 
 def u2nu(fa, inds, out=None, axes=None, periodicity=None, epsilon=None, nthread=None,
 			normalize=False, forward=False, complex=True, op=None):
@@ -587,17 +587,73 @@ def A(x): return wzip(PT(P(wunzip(x))))
 # TODO: Add proper docstrings here. Can I avoid lots of repetition?
 
 def nufft(a, inds, out=None, oshape=None, axes=None, periodicity=None, epsilon=None, nthread=None, normalize=False, flip=False):
+	"""Alias of iu2nu(..., forward=flip). This involves inverting a system with conjugate gradients"""
 	return iu2nu(a, inds, out=out, oshape=oshape, axes=axes, periodicity=periodicity, epsilon=epsilon, nthread=nthread, normalize=normalize, forward=flip)
 
 def inufft(fa, inds, out=None, axes=None, periodicity=None, epsilon=None, nthread=None, normalize=False, flip=False, complex=True, op=None):
+	"""Alias of u2nu(..., forward=flip)"""
 	return u2nu(fa, inds, out=out, axes=axes, periodicity=periodicity, epsilon=epsilon, nthread=nthread, normalize=normalize, forward=flip, complex=complex, op=op)
 
 def nufft_adjoint(a, inds, out=None, oshape=None, axes=None, periodicity=None, epsilon=None, nthread=None, normalize=False, flip=False):
+	"""Alias of nu2u(..., forward=not flip)"""
 	return nu2u(a, inds, out=out, oshape=oshape, axes=axes, periodicity=periodicity, epsilon=epsilon, nthread=nthread, normalize=normalize, forward=not flip)
 
 def inufft_adjoint(fa, inds, out=None, axes=None, periodicity=None, epsilon=None, nthread=None, normalize=False, flip=False, complex=True):
+	"""Alias of inu2u(..., forward=not flip). This involves inverting a system with conjugate gradients"""
 	return inu2u(fa, inds, out=out, axes=axes, periodicity=periodicity, epsilon=epsilon, nthread=nthread, normalize=normalize, forward=not flip)
 
+# Incremental nufft. This is useful for low-latency interpolation, or interpolation
+# where you don't know how many points there will be
+
+# Must figure out how much can be factorized out here. Sadly it wasn't
+# practicaly to use _nufft_helper here. Will wait with factorization until
+# I implement nu2u_plan
+class u2nu_plan:
+	def __init__(self, fa, axes, periodicity=None, epsilon=None, nthread=None,
+			normalize=False, forward=False, complex=True, op=None):
+		# Will set up one plan per fft-field
+		fa           = np.asarray(fa)
+		self.axes    = utils.astuple(axes)
+		self.shape   = fa.shape
+		self.gshape  = tuple([self.shape[ax] for ax in self.axes])
+		self.paxes   = tuple(utils.complement_inds(self.axes, fa.ndim))
+		self.pshape  = tuple([self.shape[ax] for ax in self.paxes])
+		if op is None: op = lambda fa: fa
+		if periodicity is None: periodicity = self.gshape
+		else: periodicity = np.zeros(len(self.axes),int)+periodicity
+		self.nthread = nthread or nthread_fft
+		self.plans = []
+		for I in utils.nditer(self.shape, axes=self.paxes):
+			faI        = op(fa[I])
+			self.ctype = faI.dtype
+			# Target accuracy
+			if epsilon is None:
+				epsilon = 1e-5 if self.ctype == np.complex64 else 1e-12
+			plan       = ducc0.nufft.experimental.incremental_u2nu(
+				grid=faI, epsilon=epsilon, nthreads=self.nthread, forward=forward,
+				periodicity=periodicity, fft_order=True)
+			self.plans.append(plan)
+		self.epsilon = epsilon
+		self.forward = forward
+		self.dtype   = utils.real_dtype(self.ctype)
+		self.ndim    = len(self.axes)
+		self.complex = complex
+		self.norm    = np.prod([fa.shape[ax] for ax in axes])
+		self.normalize = normalize
+	def eval(self, inds, out=None):
+		inds  = np.asarray(inds, dtype=self.dtype)
+		iflat = inds.reshape(self.ndim,-1).T
+		if out is None:
+			out = np.zeros(self.pshape+inds.shape[1:], self.ctype if self.complex else self.dtype)
+		oflat = out.reshape(len(self.plans),iflat.shape[0])
+		for i, plan in enumerate(self.plans):
+			vals = self.plans[i].get_points(coord=iflat)
+			if not self.complex: vals = vals.real
+			oflat[i] = vals
+			del vals
+		if self.normalize:
+			out /= self.norm
+		return out
 
 ########### Helper functions ##############
 
diff --git a/pixell/reproject.py b/pixell/reproject.py
index 12493896..e6a1f015 100644
--- a/pixell/reproject.py
+++ b/pixell/reproject.py
@@ -92,7 +92,7 @@ def thumbnails(imap, coords, r=5*utils.arcmin, res=None, proj=None, apod=2*utils
 		# I apologize for the syntax. There should be a better way of doing this
 		ipos = coordinates.transform("cel", ["cel",[[0,0,coords[si,1],coords[si,0]],False]], opos[::-1], pol=pol)
 		ipos, rest = ipos[1::-1], ipos[2:]
-		omaps[si] = ithumb.at(ipos, order=order)
+		omaps[si] = ithumb.at(ipos, mode="spline", order=order)
 		# Apply the polarization rotation. The sign is flipped because we computed the
 		# rotation from the output to the input
 		if pol: omaps[si] = enmap.rotate_pol(omaps[si], -rest[0])
@@ -232,7 +232,7 @@ def map2healpix(imap, nside=None, lmax=None, out=None, rot=None, spin=[0,2], met
 				# Not sure why the [::-1] is necessary here. Maybe psi,theta,phi vs. phi,theta,psi?
 				pos = coordinates.transform_euler(inv_euler(rot2euler(rot))[::-1], pos, pol=pol)
 			# The actual interpolation happens here
-			vals  = imap_pre.at(pos[1::-1], order=order, prefilter=False, mode=boundary)
+			vals  = imap_pre.at(pos[1::-1], order=order, prefilter=False, mode="spline", border=boundary)
 			if rot is not None and imap.ndim > 2:
 				# Update the polarization to account for the new coordinate system
 				for s, c1, c2 in enmap.spin_helper(spin, imap.shape[-3]):
diff --git a/pixell/utils.py b/pixell/utils.py
index fd72f2d5..2ae3577b 100644
--- a/pixell/utils.py
+++ b/pixell/utils.py
@@ -610,17 +610,41 @@ def __call__(self, inds, out=None):
 		return out
 
 class FourierInterpolator:
-	prefiltered = False
-	def __init__(self, arr, npre=0, epsilon=None):
+	def __init__(self, arr, npre=0, epsilon=None, precompute="plan"):
+		"""When plan=True, uses incremental u2nu. This requires
+		constructing a plan per field, overhead 10x arr. When
+		plan=False, precomputes just the """
 		from . import fft
-		self.npre = npre % arr.ndim
-		self.arr  = np.asanyarray(arr)
-		self.epsilon = epsilon
-		self.farr = fft.fft(arr, axes=tuple(range(self.npre,arr.ndim)))
+		self.npre        = npre % arr.ndim
+		self.arr         = np.asanyarray(arr)
+		self.epsilon     = epsilon
+		self.complex     = np.iscomplexobj(arr)
+		self.precompute  = precompute
+		axes = tuple(range(-arr.ndim+npre,0,1))
+		self.prefiltered = False
+		if precompute == "plan":
+			# Memory overhead: 10x arr size
+			self.plan = fft.u2nu_plan(arr, axes=axes, op=lambda arr: fft.fft(arr, axes=axes),
+					normalize=True, epsilon=self.epsilon, complex=self.complex)
+			self.prefiltered = True
+		elif precompute == "fft":
+			# Memory overhead: 2x arr size, +10x field size when calling
+			self.farr = fft.fft(arr, axes=axes)
+		elif precompute == "none":
+			self.arr  = arr
+		else:
+			raise ValueError("Invalid value of precompute: '%s'. Valid values are plan, fft or none" % str(precompute))
 	def __call__(self, inds, out=None):
 		from . import fft
 		inds, out = _ip_prepare(self, inds, out=out)
-		out = fft.interpol_nufft(self.farr, inds, out=out, nofft=True, epsilon=self.epsilon)
+		if self.precompute == "plan":
+			out = self.plan.eval(inds, out=out)
+		elif self.precompute == "fft":
+			out = fft.interpol_nufft(self.farr, inds, out=out, nofft=True,
+				epsilon=self.epsilon, complex=self.complex)
+		else:
+			out = fft.interpol_nufft(self.arr, inds, out=out,
+				epsilon=self.epsilon, complex=self.complex)
 		return out
 
 def _ip_get_mode(mode, order):
@@ -3443,6 +3467,11 @@ def replace(istr, ipat, repl):
 	if ostr == istr: raise KeyError("Pattern not found")
 	return ostr
 
+def regreplace(istr, ipat, repl, count=0, flags=0):
+	ostr, n = re.subn(ipat, repl, istr, count=count, flags=flags)
+	if n == 0: raise KeyError("Pattern not found")
+	return ostr
+
 # I used to do stuff like a[~np.isfinite(a)] = 0, but this should be
 # lower overhad and faster
 def remove_nan(a):
@@ -3580,11 +3609,14 @@ def distpow(dist, N):
 def airy(x):
 	"""Dimensionless real-space representation of Airy beam, normalized to peak at 1.
 	To get the airy beam an angular distance r from the center for a telescope with
-	aperture diameter D at wavelength λ, use airy(sin(r)*(2*pi*D/λ)).
+	aperture diameter D at wavelength λ, use airy(sin(r)/2*(2*pi*D/λ)).
+	This beam has an FWHM in terms of x of 3.2326799. So for small beams,
+	FWHM = 3.2326799 λ/(D*pi) radians. This works out to quite a bit smaller than
+	our beam, though. E.g. 1.17 arcmin where I expected 1.4 arcmin.
 	"""
 	# Avoid division by zero at low radius
 	with nowarn():
-		return np.where(x<1e-6, 1-x**2, (scipy.special.j1(2*x)/x)**2)
+		return np.where(x<1e-6, 1-0.25*x**2, (2*scipy.special.j1(x)/x)**2)
 
 def lairy(x):
 	"""This is the harmonic space representation of an Airy beam.
@@ -3598,6 +3630,8 @@ def lairy(x):
 	x = np.clip(x,0,1)
 	return (np.arccos(x)-x*(1-x**2)**0.5)/(np.pi/2)
 
+def airy_lmax(D, λ): return 2*np.pi*D/λ
+
 def airy_area(D, λ):
 	"""Area (steradians) of airy beam for an aperture of size D and wavelength λ.
 	This is simply (2λ/D)²/π"""
@@ -3647,36 +3681,31 @@ def _disk_overlap_curved_tiny(d, R):
 	First order accuracy in d"""
 	return 2*np.pi*(1-np.cos(R)) - 4*np.sin(R)*np.sin(d/2)
 
-# Hm, the first bin can be exceptional, so use the 2nd instead.
-# Would be easier if we could # demand that dl_2 = dl_3. Would
-# then remove b_2 from the equation system, and say
-# b_2 = b_3-(b_4-b_3) = 2b_3-b_4. This would give
-# l_1 = 0.5*(b_1+b_2) = 0.5*(b_1+2b_3-b_4)
-# l_2 = 0.5*(b_2+b_3) = 0.5*(3b_3-b_4)
-#
-# l = 0.5*[1 2 -1 0 ...]
-#         [0 3 -1 0 ...]
-#         [0 1  1 0 ...]
-#         [............]
-def infer_bin_edges(l):
-	"""Given bin centers l[n], returns bin edges b[n+1] such
-	that l = 0.5*(b[1:]+b[:-1]) under the assumption
-	b[2] = (b[1]+b[3])/2. This is equivalent to assuming that
-	the 2nd and 3rd bins have the same size. The problem is
-	underspecified, so an assumption like this is needed, but
-	it could be generalized exactly what it is.
-	"""
+def infer_bin_edges(centers, ref=1):
+	"""Given a list of bin centers[n], returns the corresponding
+	bin edges[n+1] such that centers=0.5*(edges[:-1]+edges[1:]).
+	Since the system is underdetermined, an extra assumption is
+	needed. This function assumes that the two consecutive bins
+	starting at index "ref" have equal width. The default, 1,
+	means that the 2nd and 3rd bins are assumed to have equal
+	width. This was chosen because the first bin often doesn't
+	follow the same pattern as the others."""
 	from scipy import sparse
-	n = len(l)
-	P = 0.5*sparse.csr_array(
+	# Equation system
+	# [c1]                  [0.5 0.5 0   0 ...]
+	# [c2]               =  [0   0.5 0.5 0 ...]
+	# [..]                  [      ......     ]
+	# [c(ref+1)-cref]       [... -1 1 ........]
+	n = len(centers)
+	P = sparse.csr_array(
 		(
-			np.concatenate([[1,2,-1,3,-1],np.ones(2*(n-2))]),
+			np.concatenate([np.full(2*n, 0.5), [-1,1]]),
 			(
-				np.concatenate([[0,0,0,1,1],np.arange(2,n),np.arange(2,n)]),
-				np.concatenate([[0,1,2,1,2],np.arange(1,n-1),np.arange(2,n)]),
+				np.concatenate([np.arange(0,n),np.arange(0,n),[n,n]]),
+				np.concatenate([np.arange(0,n),np.arange(1,n+1),[ref,ref+1]])
 			)
-		), shape=(n,n)
+		), shape=(n+1,n+1)
 	)
-	b  = sparse.linalg.spsolve(P.T.dot(P), P.T.dot(l))
-	b  = np.concatenate([b[:1],[2*b[1]-b[2]],b[1:]])
-	return b
+	rhs   = np.concatenate([centers,[centers[ref+1]-centers[ref]]])
+	edges = sparse.linalg.spsolve(P.T.dot(P), P.T.dot(rhs))
+	return edges