Merge pull request #1147 from Unidata/issue1145

allow createDimension to accept Dimension instances (issue #1145)
Unidata · Jan 18, 2022 · a91ea54 · a91ea54
2 parents c1a95d6 + 0703c6b
commit a91ea54
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 55 deletions.
diff --git a/Changelog b/Changelog
@@ -4,6 +4,10 @@
    if one does not already exist (similar to python open builtin).  Issue #1144.
    Added a mode='x' option (as in python open) which is the same as mode='w' with
    clobber=False.
+ * allow createVariable to accept either Dimension instances or Dimension
+   names in "dimensions" tuple kwarg (issue #1145).
+ * remove all vestiges of python 2 in _netCDF4.pyx and set cython language_level
+   directive to 3 in setup.py.
 
  version 1.5.8 (tag v1.5.8rel)
 ==============================

diff --git a/docs/index.html b/docs/index.html
@@ -21,7 +21,7 @@
 
                     <h2>Contents</h2>
                     <ul>
-  <li><a href="#version-158">Version 1.5.8</a></li>
+  <li><a href="#version-160">Version 1.6.0</a></li>
 </ul></li>
 <li><a href="#introduction">Introduction</a>
 <ul>
@@ -456,7 +456,7 @@ <h2>API Documentation</h2>
                     <h1 class="modulename">
 netCDF4    </h1>
 
-                        <div class="docstring"><h2 id="version-158">Version 1.5.8</h2>
+                        <div class="docstring"><h2 id="version-160">Version 1.6.0</h2>
 
 <h1 id="introduction">Introduction</h1>
 
@@ -1595,7 +1595,7 @@ <h2 id="in-memory-diskless-datasets">In-memory (diskless) Datasets</h2>
 the parallel IO example, which is in <code>examples/mpi_example.py</code>.
 Unit tests are in the <code>test</code> directory.</p>
 
-<p><strong>contact</strong>: Jeffrey Whitaker <a href="&#109;&#97;&#105;&#108;&#116;&#x6f;&#x3a;&#x6a;&#101;f&#102;&#x72;&#x65;&#x79;&#x2e;&#115;&#x2e;&#119;&#104;&#x69;&#116;&#97;&#x6b;&#x65;&#x72;&#x40;&#110;&#111;&#97;&#x61;&#x2e;&#x67;&#x6f;&#x76;">&#x6a;&#101;f&#102;&#x72;&#x65;&#x79;&#x2e;&#115;&#x2e;&#119;&#104;&#x69;&#116;&#97;&#x6b;&#x65;&#x72;&#x40;&#110;&#111;&#97;&#x61;&#x2e;&#x67;&#x6f;&#x76;</a></p>
+<p><strong>contact</strong>: Jeffrey Whitaker <a href="&#109;a&#105;&#x6c;&#116;&#x6f;&#x3a;&#x6a;&#x65;&#x66;&#102;r&#101;y.&#115;&#x2e;&#119;&#x68;&#105;&#116;&#97;&#107;&#x65;&#x72;&#64;&#110;&#111;&#x61;&#x61;&#x2e;&#x67;&#x6f;&#118;">&#x6a;&#x65;&#x66;&#102;r&#101;y.&#115;&#x2e;&#119;&#x68;&#105;&#116;&#97;&#107;&#x65;&#x72;&#64;&#110;&#111;&#x61;&#x61;&#x2e;&#x67;&#x6f;&#118;</a></p>
 
 <p><strong>copyright</strong>: 2008 by Jeffrey Whitaker.</p>
 
@@ -1725,8 +1725,10 @@ <h2 id="in-memory-diskless-datasets">In-memory (diskless) Datasets</h2>
 
 <p><strong><code>mode</code></strong>: access mode. <code>r</code> means read-only; no data can be
 modified. <code>w</code> means write; a new file is created, an existing file with
-the same name is deleted. <code>a</code> and <code>r+</code> mean append (in analogy with
-serial files); an existing file is opened for reading and writing.
+the same name is deleted. 'x' means write, but fail if an existing
+file with the same name already exists. <code>a</code> and <code>r+</code> mean append; 
+an existing file is opened for reading and writing, if 
+file does not exist already, one is created.
 Appending <code>s</code> to modes <code>r</code>, <code>w</code>, <code>r+</code> or <code>a</code> will enable unbuffered shared
 access to <code>NETCDF3_CLASSIC</code>, <code>NETCDF3_64BIT_OFFSET</code> or
 <code>NETCDF3_64BIT_DATA</code> formatted files.
@@ -1737,7 +1739,8 @@ <h2 id="in-memory-diskless-datasets">In-memory (diskless) Datasets</h2>
 
 <p><strong><code>clobber</code></strong>: if <code>True</code> (default), opening a file with <code>mode='w'</code>
 will clobber an existing file with the same name.  if <code>False</code>, an
-exception will be raised if a file with the same name already exists.</p>
+exception will be raised if a file with the same name already exists.
+mode='x' is identical to mode='w' with clobber=False.</p>
 
 <p><strong><code>format</code></strong>: underlying file format (one of <code>'NETCDF4',
 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC'</code>, <code>'NETCDF3_64BIT_OFFSET'</code> or
@@ -2062,8 +2065,9 @@ <h2 id="in-memory-diskless-datasets">In-memory (diskless) Datasets</h2>
 <p>Data from netCDF variables is presented to python as numpy arrays with
 the corresponding data type.</p>
 
-<p><code><a href="#Dataset.dimensions">dimensions</a></code> must be a tuple containing dimension names (strings) that
-have been defined previously using <code><a href="#Dataset.createDimension">Dataset.createDimension</a></code>. The default value
+<p><code><a href="#Dataset.dimensions">dimensions</a></code> must be a tuple containing <code><a href="#Dimension">Dimension</a></code> instances and/or
+dimension names (strings) that have been defined
+previously using <code><a href="#Dataset.createDimension">Dataset.createDimension</a></code>. The default value
 is an empty tuple, which means the variable is a scalar.</p>
 
 <p>If the optional keyword <code>zlib</code> is <code>True</code>, the data will be compressed in
@@ -2813,7 +2817,7 @@ <h2 id="in-memory-diskless-datasets">In-memory (diskless) Datasets</h2>
 (for a variable-length string array). Numpy string and unicode datatypes with
 length greater than one are aliases for <code>str</code>.</p>
 
-<p><strong><code><a href="#Variable.dimensions">dimensions</a></code></strong>: a tuple containing the variable's dimension names
+<p><strong><code><a href="#Variable.dimensions">dimensions</a></code></strong>: a tuple containing the variable's Dimension instances
 (defined previously with <code>createDimension</code>). Default is an empty tuple
 which means the variable is a scalar (and therefore has no dimensions).</p>
 

diff --git a/setup.py b/setup.py
@@ -616,6 +616,9 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
                              library_dirs=lib_dirs,
                              include_dirs=inc_dirs + ['include'],
                              runtime_library_dirs=runtime_lib_dirs)]
+    # set language_level directive to 3
+    for e in ext_modules:
+        e.cython_directives = {'language_level': "3"} #
 else:
     ext_modules = None
 

diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
@@ -1909,6 +1909,7 @@ cdef _get_vars(group):
                     grp = grp.parent
             free(dimids)
             # create new variable instance.
+            dimensions = tuple(_find_dim(group,d) for d in dimensions)
             if endianness == '>':
                 variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='big')
             elif endianness == '<':
@@ -2408,9 +2409,9 @@ version 4.1.2 or higher of the netcdf C lib, and rebuild netcdf4-python."""
             raise ValueError(msg)
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         ncdump = [repr(type(self))]
         dimnames = tuple(_tostr(dimname)+'(%s)'%len(self.dimensions[dimname])\
         for dimname in self.dimensions.keys())
@@ -2653,8 +2654,9 @@ length greater than one are aliases for `str`.
 Data from netCDF variables is presented to python as numpy arrays with
 the corresponding data type.
 
-`dimensions` must be a tuple containing dimension names (strings) that
-have been defined previously using `Dataset.createDimension`. The default value
+`dimensions` must be a tuple containing `Dimension` instances and/or
+dimension names (strings) that have been defined
+previously using `Dataset.createDimension`. The default value
 is an empty tuple, which means the variable is a scalar.
 
 If the optional keyword `zlib` is `True`, the data will be compressed in
@@ -2757,6 +2759,18 @@ is the number of variable dimensions."""
             group = self
         else:
             group = self.createGroup(dirname)
+        # if dimensions is a single string or Dimension instance,
+        # convert to a tuple.
+        # This prevents a common error that occurs when
+        # dimensions = 'lat' instead of ('lat',)
+        if isinstance(dimensions, (str, bytes, Dimension)):
+            dimensions = dimensions,
+        # convert elements of dimensions tuple to Dimension
+        # instances if they are strings.
+        # _find_dim looks for dimension in this group, and if not
+        # found there, looks in parent (and it's parent, etc, back to root).
+        dimensions =\
+        tuple(_find_dim(group,d) if isinstance(d,(str,bytes)) else d for d in dimensions)
         # create variable.
         group.variables[varname] = Variable(group, varname, datatype,
         dimensions=dimensions, zlib=zlib, complevel=complevel, shuffle=shuffle,
@@ -3454,9 +3468,9 @@ Read-only class variables:
             raise AttributeError("size cannot be altered")
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         if not dir(self._grp):
             return 'Dimension object no longer valid'
         if self.isunlimited():
@@ -3616,7 +3630,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
         (for a variable-length string array). Numpy string and unicode datatypes with
         length greater than one are aliases for `str`.
 
-        **`dimensions`**: a tuple containing the variable's dimension names
+        **`dimensions`**: a tuple containing the variable's Dimension instances
         (defined previously with `createDimension`). Default is an empty tuple
         which means the variable is a scalar (and therefore has no dimensions).
 
@@ -3674,7 +3688,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
         is replaced with this value.  If fill_value is set to `False`, then
         the variable is not pre-filled. The default netCDF fill values can be found
         in the dictionary `netCDF4.default_fillvals`.
-      
+
         **`chunk_cache`**: If specified, sets the chunk cache size for this variable.
         Persists as long as Dataset is open. Use `set_var_chunk_cache` to 
         change it when Dataset is re-opened. 
@@ -3696,11 +3710,6 @@ behavior is similar to Fortran or Matlab, but different than numpy.
         # if complevel is set to zero, set zlib to False.
         if not complevel:
             zlib = False
-        # if dimensions is a string, convert to a tuple
-        # this prevents a common error that occurs when
-        # dimensions = 'lat' instead of ('lat',)
-        if type(dimensions) == str or type(dimensions) == bytes or type(dimensions) == unicode:
-            dimensions = dimensions,
         self._grpid = grp._grpid
         # make a weakref to group to avoid circular ref (issue 218)
         # keep strong reference the default behaviour (issue 251)
@@ -3784,17 +3793,9 @@ behavior is similar to Fortran or Matlab, but different than numpy.
             ndims = len(dimensions)
             # find dimension ids.
             if ndims:
-                dims = []
                 dimids = <int *>malloc(sizeof(int) * ndims)
                 for n from 0 <= n < ndims:
-                    dimname = dimensions[n]
-                    # look for dimension in this group, and if not
-                    # found there, look in parent (and it's parent, etc, back to root).
-                    dim = _find_dim(grp, dimname)
-                    if dim is None:
-                        raise KeyError("dimension %s not defined in group %s or any group in it's family tree" % (dimname, grp.path))
-                    dimids[n] = dim._dimid
-                    dims.append(dim)
+                    dimids[n] = dimensions[n]._dimid
             # go into define mode if it's a netCDF 3 compatible
             # file format.  Be careful to exit define mode before
             # any exceptions are raised.
@@ -3862,8 +3863,8 @@ behavior is similar to Fortran or Matlab, but different than numpy.
                             raise ValueError('chunksizes must be a sequence with the same length as dimensions')
                         chunksizesp = <size_t *>malloc(sizeof(size_t) * ndims)
                         for n from 0 <= n < ndims:
-                            if not dims[n].isunlimited() and \
-                               chunksizes[n] > dims[n].size:
+                            if not dimensions[n].isunlimited() and \
+                               chunksizes[n] > dimensions[n].size:
                                 msg = 'chunksize cannot exceed dimension size'
                                 raise ValueError(msg)
                             chunksizesp[n] = chunksizes[n]
@@ -3923,9 +3924,7 @@ behavior is similar to Fortran or Matlab, but different than numpy.
             if grp.data_model != 'NETCDF4': grp._enddef()
         # count how many unlimited dimensions there are.
         self._nunlimdim = 0
-        for dimname in dimensions:
-            # look in current group, and parents for dim.
-            dim = _find_dim(self._grp, dimname)
+        for dim in dimensions:
             if dim.isunlimited(): self._nunlimdim = self._nunlimdim + 1
         # set ndim attribute (number of dimensions).
         with nogil:
@@ -3964,9 +3963,9 @@ behavior is similar to Fortran or Matlab, but different than numpy.
         return self[...]
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         cdef int ierr, no_fill
         if not dir(self._grp):
             return 'Variable object no longer valid'
@@ -5559,9 +5558,9 @@ the user.
         self.name = dtype_name
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         return "%r: name = '%s', numpy dtype = %s" %\
             (type(self), self.name, self.dtype)
 
@@ -5841,9 +5840,9 @@ the user.
             self.name = dtype_name
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         if self.dtype == str:
             return '%r: string type' % (type(self),)
         else:
@@ -5951,9 +5950,9 @@ the user.
         self.enum_dict = enum_dict
 
     def __repr__(self):
-        return self.__unicode__()
+        return self.__str__()
 
-    def __unicode__(self):
+    def __str__(self):
         return "%r: name = '%s', numpy dtype = %s, fields/values =%s" %\
             (type(self), self.name, self.dtype, self.enum_dict)
 

diff --git a/src/netCDF4/utils.py b/src/netCDF4/utils.py
@@ -45,7 +45,10 @@ def _find_dim(grp, dimname):
                 group = group.parent
             except:
                 raise ValueError("cannot find dimension %s in this group or parent groups" % dimname)
-    return dim
+    if dim is None:
+        raise KeyError("dimension %s not defined in group %s or any group in it's family tree" % (dimname, grp.path))
+    else:
+        return dim
 
 def _walk_grps(topgrp):
     """Iterate through all (sub-) groups of topgrp, similar to os.walktree.

diff --git a/test/tst_dims.py b/test/tst_dims.py
@@ -19,7 +19,11 @@
 TIME_LEN = None
 TIME_LENG = None
 GROUP_NAME='forecasts'
-VAR_NAME='temp'
+VAR_NAME1='temp1'
+VAR_NAME2='temp2'
+VAR_NAME3='temp3'
+VAR_NAME4='temp4'
+VAR_NAME5='temp5'
 VAR_TYPE='f8'
 
 
@@ -28,19 +32,27 @@ class DimensionsTestCase(unittest.TestCase):
     def setUp(self):
         self.file = FILE_NAME
         f  = netCDF4.Dataset(self.file, 'w')
-        f.createDimension(LAT_NAME,LAT_LEN)
-        f.createDimension(LON_NAME,LON_LEN)
-        f.createDimension(LEVEL_NAME,LEVEL_LEN)
-        f.createDimension(TIME_NAME,TIME_LEN)
-        f.createVariable(VAR_NAME,VAR_TYPE,(LEVEL_NAME, LAT_NAME, LON_NAME, TIME_NAME))
+        lat_dim=f.createDimension(LAT_NAME,LAT_LEN)
+        lon_dim=f.createDimension(LON_NAME,LON_LEN)
+        lev_dim=f.createDimension(LEVEL_NAME,LEVEL_LEN)
+        time_dim=f.createDimension(TIME_NAME,TIME_LEN)
+        # specify dimensions with names
+        fv1 = f.createVariable(VAR_NAME1,VAR_TYPE,(LEVEL_NAME, LAT_NAME, LON_NAME, TIME_NAME))
+        # specify dimensions with instances
+        fv2 = f.createVariable(VAR_NAME2,VAR_TYPE,(lev_dim,lat_dim,lon_dim,time_dim))
+        # specify dimensions using a mix of names and instances
+        fv3 = f.createVariable(VAR_NAME3,VAR_TYPE,(lev_dim, LAT_NAME, lon_dim, TIME_NAME))
+        # single dim instance for name (not in a tuple)
+        fv4 = f.createVariable(VAR_NAME4,VAR_TYPE,time_dim)
+        fv5 = f.createVariable(VAR_NAME5,VAR_TYPE,TIME_NAME)
         g = f.createGroup(GROUP_NAME)
         g.createDimension(LAT_NAME,LAT_LENG)
         g.createDimension(LON_NAME,LON_LENG)
         # should get dimensions from parent group.
         # (did not work prior to alpha 18)
         #g.createDimension(LEVEL_NAME,LEVEL_LENG)
         #g.createDimension(TIME_NAME,TIME_LENG)
-        g.createVariable(VAR_NAME,VAR_TYPE,(LEVEL_NAME, LAT_NAME, LON_NAME, TIME_NAME))
+        gv = g.createVariable(VAR_NAME1,VAR_TYPE,(LEVEL_NAME, LAT_NAME, LON_NAME, TIME_NAME))
         f.close()
 
     def tearDown(self):
@@ -51,7 +63,11 @@ def runTest(self):
         """testing dimensions"""
         # check dimensions in root group.
         f  = netCDF4.Dataset(self.file, 'r+')
-        v = f.variables[VAR_NAME]
+        v1 = f.variables[VAR_NAME1]
+        v2 = f.variables[VAR_NAME2]
+        v3 = f.variables[VAR_NAME3]
+        v4 = f.variables[VAR_NAME4]
+        v5 = f.variables[VAR_NAME5]
         isunlim = [dim.isunlimited() for dim in f.dimensions.values()]
         dimlens = [len(dim) for dim in f.dimensions.values()]
         names_check = [LAT_NAME, LON_NAME, LEVEL_NAME, TIME_NAME]
@@ -65,6 +81,15 @@ def runTest(self):
         # check that dimension names are correct.
         for name in f.dimensions.keys():
             self.assertTrue(name in names_check)
+        for name in v1.dimensions:
+            self.assertTrue(name in names_check)
+        for name in v2.dimensions:
+            self.assertTrue(name in names_check)
+        for name in v3.dimensions:
+            self.assertTrue(name in names_check)
+        self.assertTrue(v4.dimensions[0] == TIME_NAME)
+        self.assertTrue(v5.dimensions[0] == TIME_NAME)
+        # check that dimension lengths are correct.
         # check that dimension lengths are correct.
         for name,dim in f.dimensions.items():
             self.assertTrue(len(dim) == lensdict[name])
@@ -75,15 +100,15 @@ def runTest(self):
         # make sure length of dimensions change correctly.
         nadd1 = 2
         nadd2 = 4
-        v[0:nadd1,:,:,0:nadd2] = uniform(size=(nadd1,LAT_LEN,LON_LEN,nadd2))
+        v1[0:nadd1,:,:,0:nadd2] = uniform(size=(nadd1,LAT_LEN,LON_LEN,nadd2))
         lensdict[LEVEL_NAME]=nadd1
         lensdict[TIME_NAME]=nadd2
         # check that dimension lengths are correct.
         for name,dim in f.dimensions.items():
             self.assertTrue(len(dim) == lensdict[name])
         # check dimensions in subgroup.
         g = f.groups[GROUP_NAME]
-        vg = g.variables[VAR_NAME]
+        vg = g.variables[VAR_NAME1]
         isunlim = [dim.isunlimited() for dim in g.dimensions.values()]
         dimlens = [len(dim) for dim in g.dimensions.values()]
         names_check = [LAT_NAME, LON_NAME, LEVEL_NAME, TIME_NAME]