Loading src/cso/cso_earthaccess.py +215 −7 Original line number Diff line number Diff line Loading @@ -7,6 +7,12 @@ # 2025-02, Arjo Segers # Use 'shutil.move' instead of 'os.rename' for move over filesystem. # # 2025-02, Arjo Segers # Added 'CSO_EarthAccess_Download_Listing' class. # # 2025-04, Arjo Segers # Changed imports for python packaging. # ######################################################################## ### Loading Loading @@ -34,6 +40,7 @@ The classes and are defined according to the following hierchy: * :py:class:`.CSO_EarthAccess_Inquire` * :py:class:`.CSO_EarthAccess_Download` * :py:class:`.CSO_EarthAccess_Download_Listing` Classes Loading Loading @@ -66,7 +73,7 @@ import utopya class CSO_EarthAccess_Inquire(utopya.UtopyaRc): """ Create *listing* table (csv file) with on each line the location and information on Create *listing* table (csv file) with on each line the location of and information on a data file available via the `EarthAccess <https://earthaccess.readthedocs.io/>`_ package. As example, a file with VIIRS AOD could be available as:: Loading Loading @@ -115,9 +122,9 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): `VIIRS Aerosol <https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/science-domain/aerosol/#viirs>`_ page:: <rcbase>.dataset : AERDB_L2_VIIRS_SNPP <rcbase>.dataset : AERDB_L2_VIIRS_NOAA20 <rcbase>.dataset : AERDT_L2_VIIRS_SNPP <rcbase>.dataset : AERDT_L2_VIIRS_NOAA20 !<rcbase>.dataset : AERDB_L2_VIIRS_NOAA20 !<rcbase>.dataset : AERDT_L2_VIIRS_SNPP !<rcbase>.dataset : AERDT_L2_VIIRS_NOAA20 Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded:: Loading @@ -125,7 +132,7 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): <rcbase>.area : !<rcbase>.area : -30,30,35,76 Name of output csv file:: Name of the output csv file:: ! output table, here including date of today: <rcbase>.output.file : ${my.work}/AERDB_inquiry_%Y-%m-%d.csv Loading Loading @@ -678,13 +685,214 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): # info ... logging.info(f"{indent}") logging.info(f"{indent}** end convert") logging.info(f"{indent}** end download") logging.info(f"{indent}") # enddef __init__ # endclass CSO_EarthAccess_Download ######################################################################## ### ### create listing file for downloaded VIIRS files ### ######################################################################## class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files downloaded from VIIRS data portals. A *listing* file contains the names of the converted orbit files, the time range of pixels in the file, and other information extracted from the filenames or file attributes:: filename ;start_time ;end_time ;orbit 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0112.002.2023268134001.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61711 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0248.002.2023268152045.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0254.002.2023268154044.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0430.002.2023268170054.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61713 : This file could be used to scan for available files. In the settings, define the name of the file to be created:: ! create listing of downloaded files; ! eventully include time templates %Y-%m-%d etc: <rcbase>.file : /Scratch/Copernicus/VIIRS/listing.csv Optionally define a creation mode for the (parent) directories:: ! directory creation mode: <rcbase>.dmode : 0o775 An existing listing file is not replaced, unless the following flag is set:: ! renew table? <rcbase>.renew : True Specify filename filters to search for data files: <rcbase>.pattern : AER*.nc """ def __init__(self, rcfile, rcbase="", env={}, indent=""): """ Convert data. """ # modules: import os import datetime import fnmatch import collections # tools: import cso_file # info ... logging.info(f"{indent}") logging.info(f"{indent}** create listing file") logging.info(f"{indent}") # init base object: utopya.UtopyaRc.__init__(self, rcfile=rcfile, rcbase=rcbase, env=env) # directory creation mode: dmode = self.GetSetting("dmode", totype="int", default=None) # renew output? renew = self.GetSetting("renew", totype="bool") # table file to be written: lst_file = self.GetSetting("file") # evaluate current time: lst_file = datetime.datetime.now().strftime(lst_file) # create? if (not os.path.isfile(lst_file)) or renew: # info .. logging.info(f"{indent}create %s ..." % lst_file) # pattern for data files: fpattern = self.GetSetting("pattern") # info .. logging.info(f"{indent} scan for datafiles: {fpattern}") # path to listing files, data files are search relative to this: bdir = os.path.dirname(lst_file) # current directory? if len(bdir) == 0: bdir = "." # info ... logging.info(f"{indent} scan base directory: %s ..." % bdir) # create directory if necessary: cso_file.CheckDir( lst_file, dmode=dmode ) # initiallize for (re)creation: listing = cso_file.CSO_Listing(indent=f"{indent} ") # keep scanned roots for progress info: subdirs = [] # recursively search for files: for root, dirs, files in os.walk(bdir): # loop over files: for fname in files: # subdir relative to listing file: subdir = os.path.relpath(root, start=bdir) # info ... if subdir not in subdirs : # info ... logging.info(f"{indent} {subdir} ...") # store: subdirs.append(subdir) #endif ## testing .. #if subdir != "2022/007": # #logging.warning(f"{indent} skip ...") # continue ##endif # data file? if fnmatch.fnmatch(fname, fpattern): # expected filenames: # AERDB_L2_VIIRS_SNPP.A2022001.0342.002.2023076013614.nc parts = fname.split(".") if len(parts) == 6: # second is year-julday, strip the "A" of acquisition: try: t1 = datetime.datetime.strptime(parts[1][1:],"%Y%j") except: logging.error(f"could not extract date from '{parts[1]}'") raise Exception #endtry # end time: t2 = t1 + datetime.timedelta(1) else : logging.error(f"unsupported filename: {fname}") raise Exception # endif # open for extra info: sfile = cso_file.CSO_File( os.path.join(root,fname) ) # extract attributes: orbit = sfile.GetAttr( "OrbitNumber" ) # done: sfile.Close() # fill data record: data = collections.OrderedDict() data["start_time"] = t1 data["end_time"] = t2 data["orbit"] = orbit # update record: listing.UpdateRecord( os.path.join(subdir,fname), data, indent=f"{indent} ") # endfor # filename match # endfor # filenames ## testing ... #if len(listing) > 10 : # break # endfor # walk over subdirs/files # adhoc .. listing.df = listing.df.astype( { "orbit" : int } ) # sort on filename: listing.Sort( by="orbit" ) # save: listing.Save(lst_file, dmode=dmode, indent=f"{indent} ") else: # info .. logging.info(f"{indent}keep %s ..." % lst_file) # endif # info ... logging.info(f"{indent}") logging.info(f"{indent}** end listing") logging.info(f"{indent}") # enddef __init__ # endclass CSO_S5p_Download # endclass CSO_EarthAccess_Download_Listing ######################################################################## Loading Loading
src/cso/cso_earthaccess.py +215 −7 Original line number Diff line number Diff line Loading @@ -7,6 +7,12 @@ # 2025-02, Arjo Segers # Use 'shutil.move' instead of 'os.rename' for move over filesystem. # # 2025-02, Arjo Segers # Added 'CSO_EarthAccess_Download_Listing' class. # # 2025-04, Arjo Segers # Changed imports for python packaging. # ######################################################################## ### Loading Loading @@ -34,6 +40,7 @@ The classes and are defined according to the following hierchy: * :py:class:`.CSO_EarthAccess_Inquire` * :py:class:`.CSO_EarthAccess_Download` * :py:class:`.CSO_EarthAccess_Download_Listing` Classes Loading Loading @@ -66,7 +73,7 @@ import utopya class CSO_EarthAccess_Inquire(utopya.UtopyaRc): """ Create *listing* table (csv file) with on each line the location and information on Create *listing* table (csv file) with on each line the location of and information on a data file available via the `EarthAccess <https://earthaccess.readthedocs.io/>`_ package. As example, a file with VIIRS AOD could be available as:: Loading Loading @@ -115,9 +122,9 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): `VIIRS Aerosol <https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/science-domain/aerosol/#viirs>`_ page:: <rcbase>.dataset : AERDB_L2_VIIRS_SNPP <rcbase>.dataset : AERDB_L2_VIIRS_NOAA20 <rcbase>.dataset : AERDT_L2_VIIRS_SNPP <rcbase>.dataset : AERDT_L2_VIIRS_NOAA20 !<rcbase>.dataset : AERDB_L2_VIIRS_NOAA20 !<rcbase>.dataset : AERDT_L2_VIIRS_SNPP !<rcbase>.dataset : AERDT_L2_VIIRS_NOAA20 Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded:: Loading @@ -125,7 +132,7 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc): <rcbase>.area : !<rcbase>.area : -30,30,35,76 Name of output csv file:: Name of the output csv file:: ! output table, here including date of today: <rcbase>.output.file : ${my.work}/AERDB_inquiry_%Y-%m-%d.csv Loading Loading @@ -678,13 +685,214 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc): # info ... logging.info(f"{indent}") logging.info(f"{indent}** end convert") logging.info(f"{indent}** end download") logging.info(f"{indent}") # enddef __init__ # endclass CSO_EarthAccess_Download ######################################################################## ### ### create listing file for downloaded VIIRS files ### ######################################################################## class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc): """ Create *listing* file for files downloaded from VIIRS data portals. A *listing* file contains the names of the converted orbit files, the time range of pixels in the file, and other information extracted from the filenames or file attributes:: filename ;start_time ;end_time ;orbit 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0112.002.2023268134001.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61711 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0248.002.2023268152045.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0254.002.2023268154044.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712 2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0430.002.2023268170054.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61713 : This file could be used to scan for available files. In the settings, define the name of the file to be created:: ! create listing of downloaded files; ! eventully include time templates %Y-%m-%d etc: <rcbase>.file : /Scratch/Copernicus/VIIRS/listing.csv Optionally define a creation mode for the (parent) directories:: ! directory creation mode: <rcbase>.dmode : 0o775 An existing listing file is not replaced, unless the following flag is set:: ! renew table? <rcbase>.renew : True Specify filename filters to search for data files: <rcbase>.pattern : AER*.nc """ def __init__(self, rcfile, rcbase="", env={}, indent=""): """ Convert data. """ # modules: import os import datetime import fnmatch import collections # tools: import cso_file # info ... logging.info(f"{indent}") logging.info(f"{indent}** create listing file") logging.info(f"{indent}") # init base object: utopya.UtopyaRc.__init__(self, rcfile=rcfile, rcbase=rcbase, env=env) # directory creation mode: dmode = self.GetSetting("dmode", totype="int", default=None) # renew output? renew = self.GetSetting("renew", totype="bool") # table file to be written: lst_file = self.GetSetting("file") # evaluate current time: lst_file = datetime.datetime.now().strftime(lst_file) # create? if (not os.path.isfile(lst_file)) or renew: # info .. logging.info(f"{indent}create %s ..." % lst_file) # pattern for data files: fpattern = self.GetSetting("pattern") # info .. logging.info(f"{indent} scan for datafiles: {fpattern}") # path to listing files, data files are search relative to this: bdir = os.path.dirname(lst_file) # current directory? if len(bdir) == 0: bdir = "." # info ... logging.info(f"{indent} scan base directory: %s ..." % bdir) # create directory if necessary: cso_file.CheckDir( lst_file, dmode=dmode ) # initiallize for (re)creation: listing = cso_file.CSO_Listing(indent=f"{indent} ") # keep scanned roots for progress info: subdirs = [] # recursively search for files: for root, dirs, files in os.walk(bdir): # loop over files: for fname in files: # subdir relative to listing file: subdir = os.path.relpath(root, start=bdir) # info ... if subdir not in subdirs : # info ... logging.info(f"{indent} {subdir} ...") # store: subdirs.append(subdir) #endif ## testing .. #if subdir != "2022/007": # #logging.warning(f"{indent} skip ...") # continue ##endif # data file? if fnmatch.fnmatch(fname, fpattern): # expected filenames: # AERDB_L2_VIIRS_SNPP.A2022001.0342.002.2023076013614.nc parts = fname.split(".") if len(parts) == 6: # second is year-julday, strip the "A" of acquisition: try: t1 = datetime.datetime.strptime(parts[1][1:],"%Y%j") except: logging.error(f"could not extract date from '{parts[1]}'") raise Exception #endtry # end time: t2 = t1 + datetime.timedelta(1) else : logging.error(f"unsupported filename: {fname}") raise Exception # endif # open for extra info: sfile = cso_file.CSO_File( os.path.join(root,fname) ) # extract attributes: orbit = sfile.GetAttr( "OrbitNumber" ) # done: sfile.Close() # fill data record: data = collections.OrderedDict() data["start_time"] = t1 data["end_time"] = t2 data["orbit"] = orbit # update record: listing.UpdateRecord( os.path.join(subdir,fname), data, indent=f"{indent} ") # endfor # filename match # endfor # filenames ## testing ... #if len(listing) > 10 : # break # endfor # walk over subdirs/files # adhoc .. listing.df = listing.df.astype( { "orbit" : int } ) # sort on filename: listing.Sort( by="orbit" ) # save: listing.Save(lst_file, dmode=dmode, indent=f"{indent} ") else: # info .. logging.info(f"{indent}keep %s ..." % lst_file) # endif # info ... logging.info(f"{indent}") logging.info(f"{indent}** end listing") logging.info(f"{indent}") # enddef __init__ # endclass CSO_S5p_Download # endclass CSO_EarthAccess_Download_Listing ######################################################################## Loading