Package MFDataset ::
Module MFDataset
|
|
1 """
2 Module for reading multi-file netCDF Datasets, making variables
3 spanning multiple files appear as if they were in one file.
4
5 Datasets must be in C{NETCDF4_CLASSIC, NETCDF3_CLASSIC or NETCDF3_64BIT}
6 format (C{NETCDF4} Datasets won't work).
7
8 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin.
9
10 Example usage:
11
12 >>> import MFDataset, netCDF4_classic, glob, numpy
13 >>> # create a series of netCDF files with a variable sharing
14 >>> # the same unlimited dimension.
15 >>> for nfile in range(10):
16 >>> f = netCDF4_classic.Dataset('mftest'+repr(nfile)+'.nc','w')
17 >>> f.createDimension('x',None)
18 >>> x = f.createVariable('x','i',('x',))
19 >>> x[0:10] = numpy.arange(nfile*10,10*(nfile+1))
20 >>> f.close()
21 >>> # now read all those files in at once, in one Dataset.
22 >>> files = glob.glob('mftest*.nc')
23 >>> f = MFDataset.Dataset(files)
24 >>> print f.variables['x'][:]
25 [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
26 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
27 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
28 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
29 """
30
31 import netCDF4_classic
32 import numpy
33
34 __version__ = "0.5"
35
36 -class Dataset(netCDF4_classic.Dataset):
37 """
38 class for reading a multi-file netCDF dataset.
39 """
40
42 """
43 Open a Dataset spanning multiple files, making it look as if it was a
44 single file. Variables in the list of files that share the same unlimited
45 dimension are aggregated.
46
47 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin.
48
49 @param files: sequence of netCDF files; the first one will become the
50 "master" file, defining all the record variables (variables with an
51 unlimited dimension) which may span subsequent files. Attribute access
52 returns attributes only from "master" file. The files are always opened
53 in read-only mode.
54 """
55
56
57
58 master = files[0]
59
60
61
62 cdfm = netCDF4_classic.Dataset(master)
63
64 for name, value in cdfm.__dict__.items():
65 self.__dict__[name] = value
66
67
68 unlimDimId = None
69 for dimname,dim in cdfm.dimensions.items():
70 if dim.isunlimited():
71 unlimDimId = dim
72 unlimDimName = dimname
73 if unlimDimId is None:
74 raise IOError("master dataset %s does not have an unlimited dimension" % master)
75
76
77
78 masterRecVar = {}
79 for vName,v in cdfm.variables.items():
80 dims = v.dimensions
81 shape = v.shape
82 type = v.dtype
83
84
85 if (len(dims) > 0 and unlimDimName == dims[0]):
86 masterRecVar[vName] = (dims, shape, type)
87 if len(masterRecVar) == 0:
88 raise IOError("master dataset %s does not have any record variable" % master)
89
90
91
92
93
94
95
96 cdf = [cdfm]
97 self._cdf = cdf
98 cdfVLen = [len(unlimDimId)]
99 cdfRecVar = {}
100 for v in masterRecVar.keys():
101 cdfRecVar[v] = [cdfm.variables[v]]
102
103
104
105
106 for f in files[1:]:
107 part = netCDF4_classic.Dataset(f)
108 varInfo = part.variables
109 for v in masterRecVar.keys():
110
111 if v not in varInfo.keys():
112 raise IOError("record variable %s not defined in %s" % (v, f))
113
114
115 vInst = part.variables[v]
116 if not part.dimensions[vInst.dimensions[0]].isunlimited():
117 raise MFDataset("variable %s is not a record var inside %s" % (v, f))
118
119 masterDims, masterShape, masterType = masterRecVar[v][:3]
120 extDims, extShape, extType = varInfo[v][:3]
121 extDims = varInfo[v].dimensions
122 extShape = varInfo[v].shape
123 extType = varInfo[v].dtype
124
125 if masterDims != extDims:
126 raise IOError("variable %s : dimensions mismatch between "
127 "master %s (%s) and extension %s (%s)" %
128 (v, master, masterDims, f, extDims))
129
130
131
132
133 if len(masterShape) != len(extShape):
134 raise IOError("variable %s : rank mismatch between "
135 "master %s (%s) and extension %s (%s)" %
136 (v, master, len(masterShape), f, len(extShape)))
137 if masterShape[1:] != extShape[1:]:
138 raise IOError("variable %s : shape mismatch between "
139 "master %s (%s) and extension %s (%s)" %
140 (v, master, masterShape, f, extShape))
141
142
143 if masterType != extType:
144 raise IOError("variable %s : data type mismatch between "
145 "master %s (%s) and extension %s (%s)" %
146 (v, master, masterType, f, extType))
147
148
149 cdfRecVar[v].append(vInst)
150
151 cdf.append(part)
152 cdfVLen.append(len(part.dimensions[unlimDimName]))
153
154
155
156 self._files = files
157 self._cdfVLen = cdfVLen
158 self._cdfTLen = reduce(lambda x, y: x + y, cdfVLen)
159 self._cdfRecVar = cdfRecVar
160
161 self._dims = cdfm.dimensions
162 for dimname, dim in self._dims.items():
163 if dim.isunlimited():
164 self._dims[dimname] = _Dimension(dimname, dim, self._cdfVLen, self._cdfTLen)
165 self._vars = cdfm.variables
166 for varname,var in self._vars.items():
167 if varname in self._cdfRecVar.keys():
168 self._vars[varname] = _Variable(self, varname, var, unlimDimName)
169 self._file_format = []
170 for dset in self._cdf:
171 self._file_format.append(dset.file_format)
172
174 """override base class attribute creation"""
175 self.__dict__[name] = value
176
178 if name in ['variables','dimensions','file_format']:
179 if name == 'dimensions': return self._dims
180 if name == 'variables': return self._vars
181 if name == 'file_format': return self._file_format
182 else:
183 return netCDF4_classic.Dataset.__getattribute__(self, name)
184
186 return self._cdf[0].__dict__.keys()
187
189 for dset in self._cdf:
190 dset.close()
191
193 - def __init__(self, dimname, dim, dimlens, dimtotlen):
194 self.dimlens = dimlens
195 self.dimtotlen = dimtotlen
197 return self.dimtotlen
199 return True
200
202 - def __init__(self, dset, varname, var, recdimname):
203 self.dimensions = var.dimensions
204 self._dset = dset
205 self._mastervar = var
206 self._recVar = dset._cdfRecVar[varname]
207 self._recdimname = recdimname
208 self._recLen = dset._cdfVLen
209 self.dtype = var.dtype
210
211 for name, value in var.__dict__.items():
212 self.__dict__[name] = value
216 return self._mastervar.__dict__.keys()
218 if name == 'shape': return self._shape()
219 return self.__dict__[name]
221 recdimlen = len(self._dset.dimensions[self._recdimname])
222 return (recdimlen,) + self._mastervar.shape[1:]
224 """Get records from a concatenated set of variables."""
225
226 nv = len(self._recLen)
227
228
229 start, count, stride = netCDF4_classic._buildStartCountStride(elem, self.shape, self.dimensions, self._dset)
230
231 count = [abs(cnt) for cnt in count]
232 if (numpy.array(stride) < 0).any():
233 raise IndexError('negative strides not allowed when slicing MFVariable Variable instance')
234
235
236 sta = start[0]
237 step = stride[0]
238 stop = sta + count[0] * step
239
240
241
242
243
244
245 idx = []
246 vid = []
247 for n in range(nv):
248 k = self._recLen[n]
249 idx.extend(range(k))
250 vid.extend([n] * k)
251
252
253
254 lst = zip(idx, vid).__getitem__(slice(sta, stop, step))
255
256
257 newSlice = [slice(None, None, None)]
258 for n in range(1, len(start)):
259 newSlice.append(slice(start[n],
260 start[n] + count[n] * stride[n], stride[n]))
261
262
263
264 lstArr = []
265 for n in range(nv):
266
267 idx = [i for i,numv in lst if numv == n]
268 if idx:
269
270 newSlice[0] = slice(idx[0], idx[-1] + 1, step)
271
272
273 lstArr.append(netCDF4_classic.Variable.__getitem__(self._recVar[n], tuple(newSlice)))
274
275
276 if lstArr:
277 lstArr = numpy.concatenate(lstArr)
278 return lstArr
279