@@ -5,68 +5,170 @@ Parallel processing wrapper for rasterio
5
5
6
6
|Build Status |
7
7
8
+ Install
9
+ -------
10
+
11
+ From pypi:
12
+
13
+ ``pip install rio-mucho --pre ``
14
+
15
+ From github (usually for a branch / dev):
16
+
17
+ ``pip install pip install git+ssh://git@github.com/mapbox/rio-mucho.git@<branch> ``
18
+
19
+ Development:
20
+
21
+ ::
22
+
23
+ git clone git@github.com:mapbox/rio-mucho.git
24
+ cd rio-mucho
25
+ pip install -e .
26
+
8
27
Usage
9
28
-----
10
29
11
- 1. Define a function to be applied to each window chunk. This should
12
- have input arguments of:
30
+ .. code :: python
31
+
32
+ with riomucho.RioMucho([{inputs}], {output}, {run function},
33
+ windows = {windows},
34
+ global_args = {global arguments},
35
+ meta = {meta to write}) as rios:
36
+
37
+ rios.run({processes})
38
+
39
+ Arguments
40
+ ~~~~~~~~~
41
+
42
+ ``inputs ``
43
+ ^^^^^^^^^^
44
+
45
+ An list of file paths to open and read.
46
+
47
+ ``output ``
48
+ ^^^^^^^^^^
49
+
50
+ What file to write to.
51
+
52
+ ``run_function ``
53
+ ^^^^^^^^^^^^^^^^
54
+
55
+ A function to be applied to each window chunk. This should have input
56
+ arguments of:
57
+
58
+ 1. A data input, which can be one of:
59
+
60
+ - A list of numpy arrays of shape (x,y,z), one for each file as
61
+ specified in input file list ``mode="simple_read" [default] ``
62
+ - A numpy array of shape ({*n * input files x *n * band count}, {window
63
+ rows}, {window cols}) ``mode=array_read" ``
64
+ - A list of open sources for reading ``mode="manual_read" ``
13
65
14
- - A list of numpy arrays (one for each file as specified in input file
15
- list) of shape ``({bands}, {window rows}, {window cols}) ``
16
- - A ``rasterio `` window tuple
17
- - A ``rasterio `` window index (``ij ``)
18
- - A global arguments object that you can use to pass in global
66
+ 2. A ``rasterio `` window tuple
67
+ 3. A ``rasterio `` window index (``ij ``)
68
+ 4. A global arguments object that you can use to pass in global
19
69
arguments
20
70
71
+ This should return:
72
+
73
+ 1. An output array of ({count}, {window rows}, {window cols}) shape, and
74
+ of the correct data type for writing
75
+
21
76
.. code :: python
22
77
23
- def basic_run (data , window , ij , g_args ):
24
- return data[0 ]
78
+ def basic_run ({data}, {window}, {ij}, {global args}):
79
+ # # do something
80
+ return {out}
81
+
82
+ Keyword arguments
83
+ ~~~~~~~~~~~~~~~~~
84
+
85
+ ``windows={windows} ``
86
+ ^^^^^^^^^^^^^^^^^^^^^
87
+
88
+ A list of ``rasterio `` (window, ij) tuples to operate on.
89
+ ``[Default = src[0].block_windows()] ``
25
90
26
- 2. Alternatively, for more flexibility, you can use a "manual read"
27
- where you read each raster in this function. This is useful if you
28
- want to read / write different window sizes (eg for pansharpening, or
29
- buffered window reading). Here, instead of a list of arrays, the
30
- function is passed an array of rasters open for reading.
91
+ `` global_args={global arguments} ``
92
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
93
+
94
+ Since this is working in parallel, any other objects / values that you
95
+ want to be accessible in the `` run_function ``. `` [Default = {}] ``
31
96
32
97
.. code :: python
33
98
34
- def basic_run (open_files , window , ij , g_args ):
35
- return numpy.array([f.read(window = window)[0 ] for f in open_files]) / g_args[' divide' ]
99
+ global_args = {
100
+ ' divide_value' : 2
101
+ }
36
102
37
- For both of these, an array of identical shape to the destination window
38
- should be returned.
103
+ `` meta={keyword args} ``
104
+ ^^^^^^^^^^^^^^^^^^^^^^^
39
105
40
- 3. To run, make some windows, get or make some keyword args for writing,
41
- and pass these and the above function into ``riomucho ``: \`\`\` python
42
- import riomucho, rasterio, numpy
106
+ The meta to pass to the output. ``[Default = srcs[0].meta ``
43
107
44
- get windows from an input
45
- =========================
108
+ Example
109
+ -------
46
110
47
- with rasterio.open('/tmp/test\_ 1.tif') as src: windows = [[window, ij]
48
- for ij, window in src.block\_ windows()] kwargs = src.meta # since we are
49
- only writing to 2 bands kwargs.update(count=2)
111
+ .. code :: python
50
112
51
- global \_ args = { 'divide': 2 }
113
+ import riomucho, rasterio, numpy
52
114
53
- processes = 4
115
+ def basic_run (data , window , ij , g_args ):
116
+ # # do something
117
+ out = np.array(
118
+ [d[0 ] / = global_args[' divide' ] for d in data]
119
+ )
120
+ return out
54
121
55
- run it
56
- ======
122
+ # get windows from an input
123
+ with rasterio.open(' /tmp/test_1.tif' ) as src:
124
+ # # grabbing the windows as an example. Default behavior is identical.
125
+ windows = [[window, ij] for ij, window in src.block_windows()]
126
+ meta = src.meta
127
+ # since we are only writing to 2 bands
128
+ meta.update(count = 2 )
57
129
58
- with riomucho.RioMucho(['input1.tif','input2, input2.tif'],
59
- 'output.tif', basic \_ run, windows=windows, global \_ args=global \_ args,
60
- kwargs=kwargs) as rm:
130
+ global_args = {
131
+ ' divide ' : 2
132
+ }
61
133
62
- ::
134
+ processes = 4
135
+
136
+ # run it
137
+ with riomucho.RioMucho([' input1.tif' ,' input2.tif' ], ' output.tif' , basic_run,
138
+ windows = windows,
139
+ global_args = global_args,
140
+ meta = meta) as rm:
63
141
64
- rm.run(processes)
142
+ rm.run(processes)
143
+
144
+ Utility functions
145
+ -----------------
146
+
147
+ \` riomucho.utils.array\_ stack([array, array, array,...])
148
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149
+
150
+ Given a list of ({depth}, {rows}, {cols}) numpy arrays, stack into a
151
+ single (l{list length \* each image depth}, {rows}, {cols}) array. This
152
+ is useful for handling variation between ``rgb `` inputs of a single
153
+ file, or separate files for each.
154
+
155
+ One RGB file
156
+ ^^^^^^^^^^^^
157
+
158
+ .. code :: python
159
+
160
+ files = [' rgb.tif' ]
161
+ open_files = [rasterio.open(f) for f in files]
162
+ rgb = `riomucho.utils.array_stack([src.read() for src in open_files])
163
+
164
+ Separate RGB files
165
+ ^^^^^^^^^^^^^^^^^^
166
+
167
+ .. code :: python
65
168
66
- \`\`\` - If no windows are specified, rio-mucho uses the block windows
67
- of the first input raster - If no kwargs are specified, rio-mucho uses
68
- the kwargs of the first input dataset to write to output - If no global
69
- args are specified, an empty object is passed.
169
+ files = [' r.tif' , ' g.tif' , ' b.tif' ]
170
+ open_files = [rasterio.open(f) for f in files]
171
+ rgb = `riomucho.utils.array_stack([src.read() for src in open_files])
70
172
71
173
.. |Build Status | image :: https://travis-ci.org/mapbox/rio-mucho.svg?branch=master
72
174
:target: https://travis-ci.org/mapbox/rio-mucho
0 commit comments