fix bug related to coordinate names and values in catchstats

casadoj · casadoj · commit 5b8fe06736c1 · 2025-03-27T15:12:19.000+01:00
diff --git a/src/lisfloodutilities/catchstats/catchstats.py b/src/lisfloodutilities/catchstats/catchstats.py
@@ -21,6 +21,21 @@
 # from tqdm.auto import tqdm
 
 
+def check_coordinates(ds: Union[xr.Dataset, xr.DataArray]) -> Union[xr.Dataset, xr.DataArray]:
+    """Makes sure that the geographical coordinates are named 'lat' and 'lon' and rounds the values to 9 decimals to avoid conflicts.
+    """
+
+    # check names of the coordinates
+    ds = ds.rename(
+        {k: v for k, v in {'x': 'lon', 'y': 'lat'}.items() if k in ds.sizes})
+
+    # round it to avoid issues when comparing with other datasets
+    ds['lon'] = ds['lon'].round(9)
+    ds['lat'] = ds['lat'].round(9)
+
+    return ds
+
+
 def read_inputmaps(inputmaps: Union[str, Path]) -> xr.Dataset:
     """It reads the input maps in NetCDF format from the input directory
 
@@ -38,27 +53,33 @@ def read_inputmaps(inputmaps: Union[str, Path]) -> xr.Dataset:
     if not inputmaps.is_dir():
         print(f'ERROR: {inputmaps} is missing or not a directory!')
         sys.exit(1)
-        
+
     filepaths = list(inputmaps.glob('*.nc'))
     if not filepaths:
         print(f'ERROR: No NetCDF files found in "{inputmaps}"')
         sys.exit(2)
 
     print(f'{len(filepaths)} input NetCDF files found in "{inputmaps}"')
-        
+
     try:
         # for dynamic maps
-        ds = xr.open_mfdataset(filepaths, chunks='auto', parallel=True, engine='netcdf4')
+        ds = xr.open_mfdataset(filepaths, chunks='auto',
+                               parallel=True, engine='netcdf4')
         # chunks is set to auto for general purpose processing
         # it could be optimized depending on input NetCDF
     except:
         # for static maps
-        ds = xr.Dataset({file.stem.split('_')[0]: xr.open_dataset(file, engine='netcdf4')['Band1'] for file in filepaths})
+        ds = xr.Dataset({file.stem.split('_')[0]: xr.open_dataset(
+            file, engine='netcdf4')['Band1'] for file in filepaths})
     if 'wgs_1984' in ds:
         ds = ds.drop_vars('wgs_1984')
 
+    # check coordinates
+    ds = check_coordinates(ds)
+
     return ds
 
+
 def read_masks(mask: Union[str, Path]) -> Dict[int, xr.DataArray]:
     """It loads the catchment masks in NetCDF formal from the input directory
 
@@ -83,29 +104,31 @@ def read_masks(mask: Union[str, Path]) -> Dict[int, xr.DataArray]:
     if not maskpaths:
         print(f'ERROR: No NetCDF files found in "{mask}"')
         sys.exit(2)
-        
+
     print(f'{len(maskpaths)} mask NetCDF files found in "{mask}"')
 
     # load masks
     masks = {}
-    for maskpath in maskpaths:  
+    for maskpath in maskpaths:
         ID = int(maskpath.stem)
         try:
             try:
                 aoi = xr.open_dataset(maskpath, engine='netcdf4')['Band1']
             except:
                 aoi = xr.open_dataarray(maskpath, engine='netcdf4')
             aoi = xr.where(aoi.notnull(), 1, aoi)
+            aoi = check_coordinates(aoi)
             masks[ID] = aoi
         except Exception as e:
             print(f'ERROR: The mask {maskpath} could not be read: {e}')
             continue
 
     return masks
 
+
 def read_pixarea(pixarea: Union[str, Path]) -> xr.DataArray:
     """It reads the LISFLOOD pixel area static map
-    
+
     Parameters:
     -----------
     pixarea: string or Path
@@ -120,25 +143,29 @@ def read_pixarea(pixarea: Union[str, Path]) -> xr.DataArray:
     if not pixarea.is_file():
         print(f'ERROR: {pixarea} is not a file!')
         sys.exit(1)
-    
+
     try:
         weight = xr.open_dataset(pixarea, engine='netcdf4')['Band1']
     except Exception as e:
         print(f'ERROR: The weighing map "{pixarea}" could not be loaded: {e}')
         sys.exit(2)
 
+    # check coordinates
+    weight = check_coordinates(weight)
+
     return weight
 
+
 def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset],
                          masks: Dict[int, xr.DataArray],
-                         statistic: Union[str, List[str]], 
+                         statistic: Union[str, List[str]],
                          weight: Optional[xr.DataArray] = None,
                          output: Optional[Union[str, Path]] = None,
                          overwrite: bool = False
                          ) -> Optional[xr.Dataset]:
     """
     Given a set of input maps and catchment masks, it computes catchment statistics.
-    
+
     Parameters:
     -----------
     maps: xarray.DataArray or xarray.Dataset
@@ -153,7 +180,7 @@ def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset],
         directory where the resulting NetCDF files will be saved. If not provided, the results are put out as a xr.Dataset
     overwrite: boolean
         whether to overwrite or skip catchments whose output NetCDF file already exists. By default is False, so the catchment will be skipped
-    
+
     Returns:
     --------
     A xr.Dataset of all catchment statistics or a NetCDF file for each catchment in the "masks" dictionary
@@ -167,19 +194,21 @@ def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset],
     # check statistic
     if isinstance(statistic, str):
         statistic = [statistic]
-    possible_stats = ['mean', 'sum', 'std', 'var', 'min', 'max', 'median', 'count']
-    assert all(stat in possible_stats for stat in statistic), "All values in 'statistic' should be one of these: {0}".format(', '.join(possible_stats))
+    possible_stats = ['mean', 'sum', 'std',
+                      'var', 'min', 'max', 'median', 'count']
+    assert all(stat in possible_stats for stat in statistic), "All values in 'statistic' should be one of these: {0}".format(
+        ', '.join(possible_stats))
     stats_dict = {var: statistic for var in maps}
-    
+
     # output directory
     if output is None:
         results = []
     else:
         output = Path(output)
         output.mkdir(parents=True, exist_ok=True)
-        
+
     # define coordinates and variables of the resulting Dataset
-    dims = dict(maps.dims)
+    dims = dict(maps.sizes)
     dimnames = [dim.lower() for dim in dims]
     if 'lat' in dimnames and 'lon' in dimnames:
         x_dim, y_dim = 'lon', 'lat'
@@ -188,34 +217,39 @@ def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset],
     del dims[x_dim]
     del dims[y_dim]
     coords = {dim: maps[dim] for dim in dims}
-    variables = [f'{var}_{stat}' for var, stats in stats_dict.items() for stat in stats]
-    
+    variables = [f'{var}_{stat}' for var, stats in stats_dict.items()
+                 for stat in stats]
+
     # compute statistics for each catchemnt
     # for ID in tqdm(masks.keys(), desc='processing catchments'):
-    for ID in masks.keys(): 
+    for ID in masks.keys():
 
         if output is not None:
             fileout = output / f'{ID:04}.nc'
             if fileout.exists() and not overwrite:
-                print(f'Output file {fileout} already exists. Moving forward to the next catchment')
+                print(
+                    f'Output file {fileout} already exists. Moving forward to the next catchment')
                 continue
-        
+
         # create empty Dataset
         coords.update({'id': [ID]})
-        maps_aoi = xr.Dataset({var: xr.DataArray(coords=coords, dims=coords.keys()) for var in variables})
-            
+        maps_aoi = xr.Dataset(
+            {var: xr.DataArray(coords=coords, dims=coords.keys()) for var in variables})
+
         # apply mask to the dataset
         aoi = masks[ID]
-        masked_maps = maps.sel({x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1)
+        masked_maps = maps.sel(
+            {x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1)
         masked_maps = masked_maps.compute()
 
         # apply weighting
         if weight is not None:
-            masked_weight = weight.sel({x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1)
-            weighted_maps = masked_maps.weighted(masked_weight.fillna(0))  
+            masked_weight = weight.sel(
+                {x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1)
+            weighted_maps = masked_maps.weighted(masked_weight.fillna(0))
 
         # compute statistics
-        for var, stats in stats_dict.items(): 
+        for var, stats in stats_dict.items():
             for stat in stats:
                 if (stat in ['mean', 'sum', 'std', 'var']) and (weight is not None):
                     x = getattr(weighted_maps, stat)(dim=[x_dim, y_dim])[var]
@@ -236,7 +270,8 @@ def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset],
     if output is None:
         results = xr.concat(results, dim='id')
         return results
-    
+
+
 def main(argv=sys.argv):
     prog = os.path.basename(argv[0])
     parser = argparse.ArgumentParser(
@@ -247,26 +282,35 @@ def main(argv=sys.argv):
         """,
         prog=prog,
     )
-    parser.add_argument("-i", "--input", required=True, help="Directory containing the input NetCDF files")
-    parser.add_argument("-m", "--mask", required=True, help="Directory containing the mask NetCDF files")
-    parser.add_argument("-s", "--statistic", nargs='+', required=True, help='List of statistics to be computed. Possible values: mean, sum, std, var, min, max, median, count')
-    parser.add_argument("-o", "--output", required=True, help="Directory where the output NetCDF files will be saved")
-    parser.add_argument("-a", "--area", required=False, default=None, help="NetCDF file of pixel area used to weigh the statistics")
-    parser.add_argument("-w", "--overwrite", action="store_true", default=False, help="Overwrite existing output files")
-    
+    parser.add_argument("-i", "--input", required=True,
+                        help="Directory containing the input NetCDF files")
+    parser.add_argument("-m", "--mask", required=True,
+                        help="Directory containing the mask NetCDF files")
+    parser.add_argument("-s", "--statistic", nargs='+', required=True,
+                        help='List of statistics to be computed. Possible values: mean, sum, std, var, min, max, median, count')
+    parser.add_argument("-o", "--output", required=True,
+                        help="Directory where the output NetCDF files will be saved")
+    parser.add_argument("-a", "--area", required=False, default=None,
+                        help="NetCDF file of pixel area used to weigh the statistics")
+    parser.add_argument("-w", "--overwrite", action="store_true",
+                        default=False, help="Overwrite existing output files")
+
     args = parser.parse_args()
 
     try:
         maps = read_inputmaps(args.input)
         masks = read_masks(args.mask)
         weight = read_pixarea(args.area) if args.area is not None else None
-        catchment_statistics(maps, masks, args.statistic, weight=weight, output=args.output, overwrite=args.overwrite)
+        catchment_statistics(maps, masks, args.statistic, weight=weight,
+                             output=args.output, overwrite=args.overwrite)
     except Exception as e:
         print(f'ERROR: {e}')
         sys.exit(1)
-    
+
+
 def main_script():
     sys.exit(main())
 
+
 if __name__ == "__main__":
     main_script()