Loading py/cso_s5p.py +94 −49 Original line number Diff line number Diff line Loading @@ -22,7 +22,8 @@ # Fixed bug in definition of listing file dates from rcfile settings. # # 2023-11, Arjo Segers # Improved check on undefined 'href' fields in inquiry listing. # Introduced error files to inform about input files that could not be downloaded # or are corrupted, next to message files that inform about zero selected pixels. # # Loading Loading @@ -2091,12 +2092,31 @@ class CSO_S5p_Convert(utopya.UtopyaRc): ! skip some input files: <rcbase>.blacklist : S5P_PAL__L2__NO2____20190806T022006_20190806T040136_09388_01_020301_20211110T020511.nc By default the conversion will stop if a file is corrupted or could not be downloaded. To let the conversion firs process all files, an option is present to create a so-called *error file*. An *error file* has the same name as the target file of the conversion, but with extension ``.err`` instead of ``.nc``. The *error file* contains a text that describes what is wrong with the source file, for example that it cannot be opened. Enable the creation of error files with the following flag:: ! enable error files for missing or corrupted input files? <rcbase>.create-error-files : True If this flag is enabled, and an error file is found instead of the target file, the conversion will simply skip this target and will not try to download the source file again. If an input file should be converted, it is read into a :py:class:`.S5p_File` object. The :py:meth:`SelectPixels <S5p_File.SelectPixels>` method is called to select pixels based on critera defined in the settings; see its documentation for how to configure the pixel selection. This method als returns a history line to desribe the selection, which will be added as This method also returns a history line to desribe the selection, which will be added as attribute to the output file. If no pixels are selected, for example because an orbit is outside the target domain, an informative message is written to a so-called *message file*. A *message file* has the same name as the target file of the conversion, but with extension ``.msg`` instead of ``.nc``. If this file is present, the conversion will simply skip this target and will for example not try to download the source file again. The output file is created as an :py:class:`.CSO_S5p_File` object. It's :py:meth:`AddSelection <.CSO_S5p_File.AddSelection>` method is called with the input object as argument, and this will copy the selected pixels for variables specified in the settings. Loading Loading @@ -2157,7 +2177,6 @@ class CSO_S5p_Convert(utopya.UtopyaRc): import datetime import fnmatch import pandas import numpy # tools: import cso_file Loading Loading @@ -2262,6 +2281,9 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # select some specific files? whitelist = self.GetSetting("whitelist", default="").split() # write error files? with_error_files = self.GetSetting("create-error-files",totype="bool",default=False) # path to store download: input_dir__template = self.GetSetting("input.dir") # cleanup? Loading Loading @@ -2388,12 +2410,32 @@ class CSO_S5p_Convert(utopya.UtopyaRc): os.makedirs(dname) # endif # message file: # split filename at extension: fname, ext = os.path.splitext(output_filename) # error file: output_errfile = fname + ".err" # message file: output_msgfile = fname + ".msg" # messsage present? if os.path.isfile(output_msgfile): # error file present? if with_error_files and os.path.isfile(output_errfile): # info .. logging.info(" error file present:") # read: with open(output_errfile, "r") as f: lines = f.readlines() # endwith # info.. for line in lines: logging.info(" %s" % line.strip()) # endfor # info .. logging.info(" do not try to create again ...:") # do not create .. create = False # # ~ message file present? elif os.path.isfile(output_msgfile): # info .. logging.info(" message file present:") # read: Loading @@ -2402,24 +2444,27 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # endwith # info.. for line in lines: logging.info(" %s" % line) logging.info(" %s" % line.strip()) # endfor # info .. logging.info(" do not try to create again ...:") # do not create .. create = False # # ~ not present yet, create file? elif not os.path.isfile(output_filename): # info ... logging.info(" create new file ...") # always create: create = True # # ~ renew? elif renew: # info ... logging.info(" renew file ...") # always create: create = True # # ~ already a version present: elif os.path.isfile(output_filename): # there might be duplicated processings, with different processing times; Loading Loading @@ -2471,17 +2516,6 @@ class CSO_S5p_Convert(utopya.UtopyaRc): if not os.path.isfile(input_file): # info .. logging.info(" not present yet, download ...") # check .. if "href" not in rec.keys(): logging.error(f"cannot download, no 'href' column in inquiry ...") logging.error(f"check inquiry table: {filename}") raise Exception # endif if pandas.isna(rec["href"]): logging.error(f"cannot download, empty 'href' element in inquiry ...") logging.error(f"check inquiry table: {filename}") raise Exception # endif # download ... cso_dataspace.CSO_DataSpace_DownloadFile(rec["href"], input_file) # store name: Loading @@ -2490,14 +2524,22 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # download might have failed .. if not os.path.isfile(input_file): # write error file or raise error? if with_error_files: # info .. logging.warning(indent + " message input file, write message file ...") # write message file: with open(output_msgfile, "w") as f: f.write("missing file: %s" % input_file) logging.warning(indent + " missing input file, write error file ...") # write error file: with open(output_errfile, "w") as f: f.write("missing file: %s\n" % input_file) # endwith # next: continue else : # info .. logging.error(f"missing input file") logging.error(f" {input_file}") raise Exception # endif # endif # info ... Loading @@ -2506,13 +2548,13 @@ class CSO_S5p_Convert(utopya.UtopyaRc): try: sfile = S5p_File(input_file) except: # write error file or raise error or error? if with_error_files : # info .. logging.warning( indent + " could not open input file, write message file ..." ) # write message file: with open(output_msgfile, "w") as f: f.write("could not open file: %s" % input_file) logging.warning(f"{indent} could not open, write error file ...") # write error file: with open(output_errfile, "w") as f: f.write("could not open file: %s\n" % input_file) # endwith # cleanup? if downloads_cleanup and (input_file in downloads): Loading @@ -2521,6 +2563,11 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # endif # next: continue else: # info .. logging.error(f"could not open input file: {input_file}") raise Exception #endif # endtry # apply selections, return bool mask and list of history lines: Loading @@ -2539,7 +2586,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc): logging.warning(indent + " no pixels selected, write message file ...") # write message file: with open(output_msgfile, "w") as f: f.write("no pixels selected in: %s" % input_file) f.write("no pixels selected in: %s\n" % input_file) # endwith else: Loading @@ -2549,13 +2596,11 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # init: csf = CSO_S5p_File() # add: csf.AddSelection( sfile, selected, self.rcf, self.rcbase, indent=indent + " " ) csf.AddSelection(sfile, selected, self.rcf, self.rcbase, indent=indent + " " ) # update history: history.append( "added %i pixels from %s" % (nselected, os.path.basename(input_file)) ) history.append( "added %i pixels from %s" % (nselected, os.path.basename(input_file)) ) # update attributes: for key in ["orbit", "processing", "processor_version", "collection"]: attrs[key] = rec[key] Loading Loading @@ -2851,7 +2896,7 @@ class CSO_S5p_Listing(utopya.UtopyaRc): class CSO_S5p_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files download from S5P data portals. Create *listing* file for files downloaded from S5P data portals. A *listing* file contains the names of the converted orbit files, the time range of pixels in the file, and other information extracted from the filenames: Loading Loading
py/cso_s5p.py +94 −49 Original line number Diff line number Diff line Loading @@ -22,7 +22,8 @@ # Fixed bug in definition of listing file dates from rcfile settings. # # 2023-11, Arjo Segers # Improved check on undefined 'href' fields in inquiry listing. # Introduced error files to inform about input files that could not be downloaded # or are corrupted, next to message files that inform about zero selected pixels. # # Loading Loading @@ -2091,12 +2092,31 @@ class CSO_S5p_Convert(utopya.UtopyaRc): ! skip some input files: <rcbase>.blacklist : S5P_PAL__L2__NO2____20190806T022006_20190806T040136_09388_01_020301_20211110T020511.nc By default the conversion will stop if a file is corrupted or could not be downloaded. To let the conversion firs process all files, an option is present to create a so-called *error file*. An *error file* has the same name as the target file of the conversion, but with extension ``.err`` instead of ``.nc``. The *error file* contains a text that describes what is wrong with the source file, for example that it cannot be opened. Enable the creation of error files with the following flag:: ! enable error files for missing or corrupted input files? <rcbase>.create-error-files : True If this flag is enabled, and an error file is found instead of the target file, the conversion will simply skip this target and will not try to download the source file again. If an input file should be converted, it is read into a :py:class:`.S5p_File` object. The :py:meth:`SelectPixels <S5p_File.SelectPixels>` method is called to select pixels based on critera defined in the settings; see its documentation for how to configure the pixel selection. This method als returns a history line to desribe the selection, which will be added as This method also returns a history line to desribe the selection, which will be added as attribute to the output file. If no pixels are selected, for example because an orbit is outside the target domain, an informative message is written to a so-called *message file*. A *message file* has the same name as the target file of the conversion, but with extension ``.msg`` instead of ``.nc``. If this file is present, the conversion will simply skip this target and will for example not try to download the source file again. The output file is created as an :py:class:`.CSO_S5p_File` object. It's :py:meth:`AddSelection <.CSO_S5p_File.AddSelection>` method is called with the input object as argument, and this will copy the selected pixels for variables specified in the settings. Loading Loading @@ -2157,7 +2177,6 @@ class CSO_S5p_Convert(utopya.UtopyaRc): import datetime import fnmatch import pandas import numpy # tools: import cso_file Loading Loading @@ -2262,6 +2281,9 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # select some specific files? whitelist = self.GetSetting("whitelist", default="").split() # write error files? with_error_files = self.GetSetting("create-error-files",totype="bool",default=False) # path to store download: input_dir__template = self.GetSetting("input.dir") # cleanup? Loading Loading @@ -2388,12 +2410,32 @@ class CSO_S5p_Convert(utopya.UtopyaRc): os.makedirs(dname) # endif # message file: # split filename at extension: fname, ext = os.path.splitext(output_filename) # error file: output_errfile = fname + ".err" # message file: output_msgfile = fname + ".msg" # messsage present? if os.path.isfile(output_msgfile): # error file present? if with_error_files and os.path.isfile(output_errfile): # info .. logging.info(" error file present:") # read: with open(output_errfile, "r") as f: lines = f.readlines() # endwith # info.. for line in lines: logging.info(" %s" % line.strip()) # endfor # info .. logging.info(" do not try to create again ...:") # do not create .. create = False # # ~ message file present? elif os.path.isfile(output_msgfile): # info .. logging.info(" message file present:") # read: Loading @@ -2402,24 +2444,27 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # endwith # info.. for line in lines: logging.info(" %s" % line) logging.info(" %s" % line.strip()) # endfor # info .. logging.info(" do not try to create again ...:") # do not create .. create = False # # ~ not present yet, create file? elif not os.path.isfile(output_filename): # info ... logging.info(" create new file ...") # always create: create = True # # ~ renew? elif renew: # info ... logging.info(" renew file ...") # always create: create = True # # ~ already a version present: elif os.path.isfile(output_filename): # there might be duplicated processings, with different processing times; Loading Loading @@ -2471,17 +2516,6 @@ class CSO_S5p_Convert(utopya.UtopyaRc): if not os.path.isfile(input_file): # info .. logging.info(" not present yet, download ...") # check .. if "href" not in rec.keys(): logging.error(f"cannot download, no 'href' column in inquiry ...") logging.error(f"check inquiry table: {filename}") raise Exception # endif if pandas.isna(rec["href"]): logging.error(f"cannot download, empty 'href' element in inquiry ...") logging.error(f"check inquiry table: {filename}") raise Exception # endif # download ... cso_dataspace.CSO_DataSpace_DownloadFile(rec["href"], input_file) # store name: Loading @@ -2490,14 +2524,22 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # download might have failed .. if not os.path.isfile(input_file): # write error file or raise error? if with_error_files: # info .. logging.warning(indent + " message input file, write message file ...") # write message file: with open(output_msgfile, "w") as f: f.write("missing file: %s" % input_file) logging.warning(indent + " missing input file, write error file ...") # write error file: with open(output_errfile, "w") as f: f.write("missing file: %s\n" % input_file) # endwith # next: continue else : # info .. logging.error(f"missing input file") logging.error(f" {input_file}") raise Exception # endif # endif # info ... Loading @@ -2506,13 +2548,13 @@ class CSO_S5p_Convert(utopya.UtopyaRc): try: sfile = S5p_File(input_file) except: # write error file or raise error or error? if with_error_files : # info .. logging.warning( indent + " could not open input file, write message file ..." ) # write message file: with open(output_msgfile, "w") as f: f.write("could not open file: %s" % input_file) logging.warning(f"{indent} could not open, write error file ...") # write error file: with open(output_errfile, "w") as f: f.write("could not open file: %s\n" % input_file) # endwith # cleanup? if downloads_cleanup and (input_file in downloads): Loading @@ -2521,6 +2563,11 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # endif # next: continue else: # info .. logging.error(f"could not open input file: {input_file}") raise Exception #endif # endtry # apply selections, return bool mask and list of history lines: Loading @@ -2539,7 +2586,7 @@ class CSO_S5p_Convert(utopya.UtopyaRc): logging.warning(indent + " no pixels selected, write message file ...") # write message file: with open(output_msgfile, "w") as f: f.write("no pixels selected in: %s" % input_file) f.write("no pixels selected in: %s\n" % input_file) # endwith else: Loading @@ -2549,13 +2596,11 @@ class CSO_S5p_Convert(utopya.UtopyaRc): # init: csf = CSO_S5p_File() # add: csf.AddSelection( sfile, selected, self.rcf, self.rcbase, indent=indent + " " ) csf.AddSelection(sfile, selected, self.rcf, self.rcbase, indent=indent + " " ) # update history: history.append( "added %i pixels from %s" % (nselected, os.path.basename(input_file)) ) history.append( "added %i pixels from %s" % (nselected, os.path.basename(input_file)) ) # update attributes: for key in ["orbit", "processing", "processor_version", "collection"]: attrs[key] = rec[key] Loading Loading @@ -2851,7 +2896,7 @@ class CSO_S5p_Listing(utopya.UtopyaRc): class CSO_S5p_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files download from S5P data portals. Create *listing* file for files downloaded from S5P data portals. A *listing* file contains the names of the converted orbit files, the time range of pixels in the file, and other information extracted from the filenames: Loading