Loading src/cso/cso_file.py +75 −43 Original line number Diff line number Diff line # Changes # # CHANGES # # 2022-09, Arjo Segers # Write files with zlib-compression, option to disable this. Loading Loading @@ -39,6 +40,14 @@ # 2025-04, Arjo Segers # Enable zlib compression only for numerical data. # Avoid warnings from packing in case of all-nan values. # Open a file rather than loading it. # Extended sort options for listing files. # Support creation of listing file objects without filename. # Support selection of multiple records from listing file. # # 2025-04, Arjo Segers # Changed imports for python packaging. # Change expansion of datetime values from csv files for recent pandas version. # ######################################################################## Loading Loading @@ -189,7 +198,7 @@ def Pack_DataArray(da, dtype="i2"): # only floats ... if da.dtype in [numpy.float32, numpy.float64]: # should have some values .. # any values defined? if numpy.any( ~ numpy.isnan(da.values) ) : # value range, ignore nan's: vmin = numpy.nanmin(da.values) Loading Loading @@ -276,11 +285,13 @@ class CSO_File(object): raise Exception # endif # access dataset: with xarray.open_dataset(self.filename) as self.ds: # load entire file: self.ds.load() # endwith # xarray # open file: try: self.ds = xarray.open_dataset(self.filename) except: logging.error(f"could not open (corrupted?) file: {self.filename}") raise #endtry else: # dummy: Loading Loading @@ -1291,14 +1302,25 @@ class CSO_Listing(object): # head for index column: self.index_label = "filename" # read? if filename is not None: # store filename: self.filename = filename # directory name: if self.filename is not None: # check .. if not os.path.isfile(filename): logging.error("listing file not found: %s" % filename) raise Exception # endif # base directory: self.dirname = os.path.dirname(self.filename) # could be empty .. if len(self.dirname) == 0 : self.dirname = os.curdir # endif # info ... logging.info(f"{indent} read listing {filename} ...") # read: Loading @@ -1314,7 +1336,11 @@ class CSO_Listing(object): self.df["end_time"] = pandas.to_datetime(self.df["end_time"]) else: # new table: # not defined yet, assume current location: self.dirname = os.curdir # new empty table: self.df = pandas.DataFrame(columns=["start_time", "end_time"]) # endif Loading Loading @@ -1343,7 +1369,7 @@ class CSO_Listing(object): # save, also write the index column: self.df.to_csv(filename, sep=self.sep, columns=columns, index_label=self.index_label) # enddef Close # enddef Save # * Loading Loading @@ -1465,7 +1491,7 @@ class CSO_Listing(object): # check .. if fname not in self.df.index: logging.error('file "%s" is not a record in table: %s' % (fname, filename)) logging.error(f"file '{fname}' is not a record in table: {self.filename}") raise Exception # endif Loading Loading @@ -1532,9 +1558,9 @@ class CSO_Listing(object): # * def Select(self, tr=None, method="overlap", expr=None, blacklist=[], indent="", **kwargs): def Select(self, tr=None, method="overlap", expr=None, blacklist=[], verbose=True, indent="", **kwargs): """ Return :py:class:`CSO_Listing` objects with selection of records. Return :py:class:`CSO_Listing` object with selection of records. Optional arguments: Loading Loading @@ -1594,7 +1620,7 @@ class CSO_Listing(object): for key, value in kwargs.items(): # check .. if key not in df.keys(): logging.error(f"key '{key}' not defined in listing") logging.error(f"key '{key}' not defined in listing: {self.filename}") raise Exception # endif # select: Loading @@ -1606,6 +1632,9 @@ class CSO_Listing(object): # evaluate selection expression? if expr is not None: # replace templates: # %{orbit} == '12345' # to: # xrec['orbit'] == '12345' for key in self.df.keys(): expr = expr.replace("%{" + key + "}", "xrec['" + key + "']") # endfor Loading @@ -1632,21 +1661,19 @@ class CSO_Listing(object): if eval(selection): selected.append(indx) filestatus[indx] = "selected" rec = xrec # endif # endfor # records # exactly one? then leave: if len(selected) == 1: # any selected? if len(selected) > 0: # leave: break elif len(selected) > 1: logging.error(f"found more than one record matching selection: {selection}") for fname in selected: logging.error(f" {fname}") # endfor raise Exception # endif # number found #endif # endfor # selection criteria # show selection? if verbose : # info ... logging.info(f"{indent}available records(s):") # loop: Loading @@ -1656,6 +1683,7 @@ class CSO_Listing(object): line = line + " [" + filestatus[fname] + "]" logging.info(f"{indent} {line}") # endfor #endif # verbose # no match? if len(selected) == 0: Loading @@ -1670,8 +1698,8 @@ class CSO_Listing(object): # create empty dataframe as result: df = pandas.DataFrame(columns=df.columns) else: # extract selected record: df = df.loc[[selected[0]]] # extract selected record(s): df = df.loc[selected] # endif # endif Loading Loading @@ -1721,13 +1749,17 @@ class CSO_Listing(object): # * def Sort(self, by="filename"): def Sort(self, by=None): """ Sort listing table by filename or other key. Sort listing table by index (default, this is the "filename") or by a named column. """ # sort inplace: # sort index or values: if by is None: self.df.sort_index(inplace=True) else : self.df.sort_values(by, inplace=True) # endif # endef Sort Loading Loading
src/cso/cso_file.py +75 −43 Original line number Diff line number Diff line # Changes # # CHANGES # # 2022-09, Arjo Segers # Write files with zlib-compression, option to disable this. Loading Loading @@ -39,6 +40,14 @@ # 2025-04, Arjo Segers # Enable zlib compression only for numerical data. # Avoid warnings from packing in case of all-nan values. # Open a file rather than loading it. # Extended sort options for listing files. # Support creation of listing file objects without filename. # Support selection of multiple records from listing file. # # 2025-04, Arjo Segers # Changed imports for python packaging. # Change expansion of datetime values from csv files for recent pandas version. # ######################################################################## Loading Loading @@ -189,7 +198,7 @@ def Pack_DataArray(da, dtype="i2"): # only floats ... if da.dtype in [numpy.float32, numpy.float64]: # should have some values .. # any values defined? if numpy.any( ~ numpy.isnan(da.values) ) : # value range, ignore nan's: vmin = numpy.nanmin(da.values) Loading Loading @@ -276,11 +285,13 @@ class CSO_File(object): raise Exception # endif # access dataset: with xarray.open_dataset(self.filename) as self.ds: # load entire file: self.ds.load() # endwith # xarray # open file: try: self.ds = xarray.open_dataset(self.filename) except: logging.error(f"could not open (corrupted?) file: {self.filename}") raise #endtry else: # dummy: Loading Loading @@ -1291,14 +1302,25 @@ class CSO_Listing(object): # head for index column: self.index_label = "filename" # read? if filename is not None: # store filename: self.filename = filename # directory name: if self.filename is not None: # check .. if not os.path.isfile(filename): logging.error("listing file not found: %s" % filename) raise Exception # endif # base directory: self.dirname = os.path.dirname(self.filename) # could be empty .. if len(self.dirname) == 0 : self.dirname = os.curdir # endif # info ... logging.info(f"{indent} read listing {filename} ...") # read: Loading @@ -1314,7 +1336,11 @@ class CSO_Listing(object): self.df["end_time"] = pandas.to_datetime(self.df["end_time"]) else: # new table: # not defined yet, assume current location: self.dirname = os.curdir # new empty table: self.df = pandas.DataFrame(columns=["start_time", "end_time"]) # endif Loading Loading @@ -1343,7 +1369,7 @@ class CSO_Listing(object): # save, also write the index column: self.df.to_csv(filename, sep=self.sep, columns=columns, index_label=self.index_label) # enddef Close # enddef Save # * Loading Loading @@ -1465,7 +1491,7 @@ class CSO_Listing(object): # check .. if fname not in self.df.index: logging.error('file "%s" is not a record in table: %s' % (fname, filename)) logging.error(f"file '{fname}' is not a record in table: {self.filename}") raise Exception # endif Loading Loading @@ -1532,9 +1558,9 @@ class CSO_Listing(object): # * def Select(self, tr=None, method="overlap", expr=None, blacklist=[], indent="", **kwargs): def Select(self, tr=None, method="overlap", expr=None, blacklist=[], verbose=True, indent="", **kwargs): """ Return :py:class:`CSO_Listing` objects with selection of records. Return :py:class:`CSO_Listing` object with selection of records. Optional arguments: Loading Loading @@ -1594,7 +1620,7 @@ class CSO_Listing(object): for key, value in kwargs.items(): # check .. if key not in df.keys(): logging.error(f"key '{key}' not defined in listing") logging.error(f"key '{key}' not defined in listing: {self.filename}") raise Exception # endif # select: Loading @@ -1606,6 +1632,9 @@ class CSO_Listing(object): # evaluate selection expression? if expr is not None: # replace templates: # %{orbit} == '12345' # to: # xrec['orbit'] == '12345' for key in self.df.keys(): expr = expr.replace("%{" + key + "}", "xrec['" + key + "']") # endfor Loading @@ -1632,21 +1661,19 @@ class CSO_Listing(object): if eval(selection): selected.append(indx) filestatus[indx] = "selected" rec = xrec # endif # endfor # records # exactly one? then leave: if len(selected) == 1: # any selected? if len(selected) > 0: # leave: break elif len(selected) > 1: logging.error(f"found more than one record matching selection: {selection}") for fname in selected: logging.error(f" {fname}") # endfor raise Exception # endif # number found #endif # endfor # selection criteria # show selection? if verbose : # info ... logging.info(f"{indent}available records(s):") # loop: Loading @@ -1656,6 +1683,7 @@ class CSO_Listing(object): line = line + " [" + filestatus[fname] + "]" logging.info(f"{indent} {line}") # endfor #endif # verbose # no match? if len(selected) == 0: Loading @@ -1670,8 +1698,8 @@ class CSO_Listing(object): # create empty dataframe as result: df = pandas.DataFrame(columns=df.columns) else: # extract selected record: df = df.loc[[selected[0]]] # extract selected record(s): df = df.loc[selected] # endif # endif Loading Loading @@ -1721,13 +1749,17 @@ class CSO_Listing(object): # * def Sort(self, by="filename"): def Sort(self, by=None): """ Sort listing table by filename or other key. Sort listing table by index (default, this is the "filename") or by a named column. """ # sort inplace: # sort index or values: if by is None: self.df.sort_index(inplace=True) else : self.df.sort_values(by, inplace=True) # endif # endef Sort Loading