Package MFDataset :: Module MFDataset
[hide private]
[frames] | no frames]

Source Code for Module MFDataset.MFDataset

  1  """ 
  2  Module for reading multi-file netCDF Datasets, making variables 
  3  spanning multiple files appear as if they were in one file. 
  4   
  5  Datasets must be in C{NETCDF4_CLASSIC, NETCDF3_CLASSIC or NETCDF3_64BIT} 
  6  format (C{NETCDF4} Datasets won't work). 
  7   
  8  Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin. 
  9   
 10  Example usage: 
 11   
 12  >>> import MFDataset, netCDF4_classic, glob, numpy 
 13  >>> # create a series of netCDF files with a variable sharing 
 14  >>> # the same unlimited dimension. 
 15  >>> for nfile in range(10): 
 16  >>>     f = netCDF4_classic.Dataset('mftest'+repr(nfile)+'.nc','w') 
 17  >>>     f.createDimension('x',None) 
 18  >>>     x = f.createVariable('x','i',('x',)) 
 19  >>>     x[0:10] = numpy.arange(nfile*10,10*(nfile+1)) 
 20  >>>     f.close() 
 21  >>> # now read all those files in at once, in one Dataset. 
 22  >>> files = glob.glob('mftest*.nc') 
 23  >>> f = MFDataset.Dataset(files) 
 24  >>> print f.variables['x'][:] 
 25  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
 26   25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 
 27   50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 
 28   75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99] 
 29  """ 
 30   
 31  import netCDF4_classic 
 32  import numpy 
 33   
 34  __version__ = "0.5" 
 35   
36 -class Dataset(netCDF4_classic.Dataset):
37 """ 38 class for reading a multi-file netCDF dataset. 39 """ 40
41 - def __init__(self, files):
42 """ 43 Open a Dataset spanning multiple files, making it look as if it was a 44 single file. Variables in the list of files that share the same unlimited 45 dimension are aggregated. 46 47 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin. 48 49 @param files: sequence of netCDF files; the first one will become the 50 "master" file, defining all the record variables (variables with an 51 unlimited dimension) which may span subsequent files. Attribute access 52 returns attributes only from "master" file. The files are always opened 53 in read-only mode. 54 """ 55 56 # Open the master file in the base class, so that the CDFMF instance 57 # can be used like a CDF instance. 58 master = files[0] 59 60 # Open the master again, this time as a classic CDF instance. This will avoid 61 # calling methods of the CDFMF subclass when querying the master file. 62 cdfm = netCDF4_classic.Dataset(master) 63 # copy attributes from master. 64 for name, value in cdfm.__dict__.items(): 65 self.__dict__[name] = value 66 67 # Make sure the master defines an unlimited dimension. 68 unlimDimId = None 69 for dimname,dim in cdfm.dimensions.items(): 70 if dim.isunlimited(): 71 unlimDimId = dim 72 unlimDimName = dimname 73 if unlimDimId is None: 74 raise IOError("master dataset %s does not have an unlimited dimension" % master) 75 76 # Get info on all record variables defined in the master. 77 # Make sure the master defines at least one record variable. 78 masterRecVar = {} 79 for vName,v in cdfm.variables.items(): 80 dims = v.dimensions 81 shape = v.shape 82 type = v.dtype 83 # Be carefull: we may deal with a scalar (dimensionless) variable. 84 # Unlimited dimension always occupies index 0. 85 if (len(dims) > 0 and unlimDimName == dims[0]): 86 masterRecVar[vName] = (dims, shape, type) 87 if len(masterRecVar) == 0: 88 raise IOError("master dataset %s does not have any record variable" % master) 89 90 # Create the following: 91 # cdf list of Dataset instances 92 # cdfVLen list unlimited dimension lengths in each CDF instance 93 # cdfRecVar dictionnary indexed by the record var names; each key holds 94 # a list of the corresponding Variable instance, one for each 95 # cdf file of the file set 96 cdf = [cdfm] 97 self._cdf = cdf # Store this now, because dim() method needs it 98 cdfVLen = [len(unlimDimId)] 99 cdfRecVar = {} 100 for v in masterRecVar.keys(): 101 cdfRecVar[v] = [cdfm.variables[v]] 102 103 # Open each remaining file in read-only mode. 104 # Make sure each file defines the same record variables as the master 105 # and that the variables are defined in the same way (name, shape and type) 106 for f in files[1:]: 107 part = netCDF4_classic.Dataset(f) 108 varInfo = part.variables 109 for v in masterRecVar.keys(): 110 # Make sure master rec var is also defined here. 111 if v not in varInfo.keys(): 112 raise IOError("record variable %s not defined in %s" % (v, f)) 113 114 # Make sure it is a record var. 115 vInst = part.variables[v] 116 if not part.dimensions[vInst.dimensions[0]].isunlimited(): 117 raise MFDataset("variable %s is not a record var inside %s" % (v, f)) 118 119 masterDims, masterShape, masterType = masterRecVar[v][:3] 120 extDims, extShape, extType = varInfo[v][:3] 121 extDims = varInfo[v].dimensions 122 extShape = varInfo[v].shape 123 extType = varInfo[v].dtype 124 # Check that dimension names are identical. 125 if masterDims != extDims: 126 raise IOError("variable %s : dimensions mismatch between " 127 "master %s (%s) and extension %s (%s)" % 128 (v, master, masterDims, f, extDims)) 129 130 # Check that the ranks are identical, and the dimension lengths are 131 # identical (except for that of the unlimited dimension, which of 132 # course may vary. 133 if len(masterShape) != len(extShape): 134 raise IOError("variable %s : rank mismatch between " 135 "master %s (%s) and extension %s (%s)" % 136 (v, master, len(masterShape), f, len(extShape))) 137 if masterShape[1:] != extShape[1:]: 138 raise IOError("variable %s : shape mismatch between " 139 "master %s (%s) and extension %s (%s)" % 140 (v, master, masterShape, f, extShape)) 141 142 # Check that the data types are identical. 143 if masterType != extType: 144 raise IOError("variable %s : data type mismatch between " 145 "master %s (%s) and extension %s (%s)" % 146 (v, master, masterType, f, extType)) 147 148 # Everythig ok. 149 cdfRecVar[v].append(vInst) 150 151 cdf.append(part) 152 cdfVLen.append(len(part.dimensions[unlimDimName])) 153 154 # Attach attributes to the MFDataset.Dataset instance. 155 # A local __setattr__() method is required for them. 156 self._files = files # list of cdf file names in the set 157 self._cdfVLen = cdfVLen # list of unlimited lengths 158 self._cdfTLen = reduce(lambda x, y: x + y, cdfVLen) # total length 159 self._cdfRecVar = cdfRecVar # dictionary of Variable instances for all 160 # the record variables 161 self._dims = cdfm.dimensions 162 for dimname, dim in self._dims.items(): 163 if dim.isunlimited(): 164 self._dims[dimname] = _Dimension(dimname, dim, self._cdfVLen, self._cdfTLen) 165 self._vars = cdfm.variables 166 for varname,var in self._vars.items(): 167 if varname in self._cdfRecVar.keys(): 168 self._vars[varname] = _Variable(self, varname, var, unlimDimName) 169 self._file_format = [] 170 for dset in self._cdf: 171 self._file_format.append(dset.file_format)
172
173 - def __setattr__(self, name, value):
174 """override base class attribute creation""" 175 self.__dict__[name] = value
176
177 - def __getattribute__(self, name):
178 if name in ['variables','dimensions','file_format']: 179 if name == 'dimensions': return self._dims 180 if name == 'variables': return self._vars 181 if name == 'file_format': return self._file_format 182 else: 183 return netCDF4_classic.Dataset.__getattribute__(self, name)
184
185 - def ncattrs(self):
186 return self._cdf[0].__dict__.keys()
187
188 - def close(self):
189 for dset in self._cdf: 190 dset.close()
191
192 -class _Dimension(object):
193 - def __init__(self, dimname, dim, dimlens, dimtotlen):
194 self.dimlens = dimlens 195 self.dimtotlen = dimtotlen
196 - def __len__(self):
197 return self.dimtotlen
198 - def isunlimited(self):
199 return True
200
201 -class _Variable(object):
202 - def __init__(self, dset, varname, var, recdimname):
203 self.dimensions = var.dimensions 204 self._dset = dset 205 self._mastervar = var 206 self._recVar = dset._cdfRecVar[varname] 207 self._recdimname = recdimname 208 self._recLen = dset._cdfVLen 209 self.dtype = var.dtype 210 # copy attributes from master. 211 for name, value in var.__dict__.items(): 212 self.__dict__[name] = value
213 - def typecode(self):
214 return self.dtype
215 - def ncattrs(self):
216 return self._mastervar.__dict__.keys()
217 - def __getattr__(self,name):
218 if name == 'shape': return self._shape() 219 return self.__dict__[name]
220 - def _shape(self):
221 recdimlen = len(self._dset.dimensions[self._recdimname]) 222 return (recdimlen,) + self._mastervar.shape[1:]
223 - def __getitem__(self, elem):
224 """Get records from a concatenated set of variables.""" 225 # Number of variables making up the MFVariable.Variable. 226 nv = len(self._recLen) 227 # Parse the slicing expression, needed to properly handle 228 # a possible ellipsis. 229 start, count, stride = netCDF4_classic._buildStartCountStride(elem, self.shape, self.dimensions, self._dset) 230 # make sure count=-1 becomes count=1 231 count = [abs(cnt) for cnt in count] 232 if (numpy.array(stride) < 0).any(): 233 raise IndexError('negative strides not allowed when slicing MFVariable Variable instance') 234 # Start, stop and step along 1st dimension, eg the unlimited 235 # dimension. 236 sta = start[0] 237 step = stride[0] 238 stop = sta + count[0] * step 239 240 # Build a list representing the concatenated list of all records in 241 # the MFVariable variable set. The list is composed of 2-elem lists 242 # each holding: 243 # the record index inside the variables, from 0 to n 244 # the index of the Variable instance to which each record belongs 245 idx = [] # list of record indices 246 vid = [] # list of Variable indices 247 for n in range(nv): 248 k = self._recLen[n] # number of records in this variable 249 idx.extend(range(k)) 250 vid.extend([n] * k) 251 252 # Merge the two lists to get a list of 2-elem lists. 253 # Slice this list along the first dimension. 254 lst = zip(idx, vid).__getitem__(slice(sta, stop, step)) 255 256 # Rebuild the slicing expression for dimensions 1 and ssq. 257 newSlice = [slice(None, None, None)] 258 for n in range(1, len(start)): # skip dimension 0 259 newSlice.append(slice(start[n], 260 start[n] + count[n] * stride[n], stride[n])) 261 262 # Apply the slicing expression to each var in turn, extracting records 263 # in a list of arrays. 264 lstArr = [] 265 for n in range(nv): 266 # Get the list of indices for variable 'n'. 267 idx = [i for i,numv in lst if numv == n] 268 if idx: 269 # Rebuild slicing expression for dimension 0. 270 newSlice[0] = slice(idx[0], idx[-1] + 1, step) 271 # Extract records from the var, and append them to a list 272 # of arrays. 273 lstArr.append(netCDF4_classic.Variable.__getitem__(self._recVar[n], tuple(newSlice))) 274 275 # Return the extracted records as a unified array. 276 if lstArr: 277 lstArr = numpy.concatenate(lstArr) 278 return lstArr
279