TNO Intern

Commit f6e7da38 authored by Arjo Segers's avatar Arjo Segers
Browse files

Added class to create listing file of downloaded VIIRS files.

parent f65776aa
Loading
Loading
Loading
Loading
+215 −7
Original line number Diff line number Diff line
@@ -7,6 +7,12 @@
# 2025-02, Arjo Segers
#   Use 'shutil.move' instead of 'os.rename' for move over filesystem.
#
# 2025-02, Arjo Segers
#   Added 'CSO_EarthAccess_Download_Listing' class.
#
# 2025-04, Arjo Segers
#   Changed imports for python packaging.
#   

########################################################################
###
@@ -34,6 +40,7 @@ The classes and are defined according to the following hierchy:

  * :py:class:`.CSO_EarthAccess_Inquire`
  * :py:class:`.CSO_EarthAccess_Download`
  * :py:class:`.CSO_EarthAccess_Download_Listing`


Classes
@@ -66,7 +73,7 @@ import utopya
class CSO_EarthAccess_Inquire(utopya.UtopyaRc):

    """
    Create *listing* table (csv file) with on each line the location and information on
    Create *listing* table (csv file) with on each line the location of and information on
    a data file available via the  `EarthAccess <https://earthaccess.readthedocs.io/>`_ package.

    As example, a file with VIIRS AOD could be available as::
@@ -115,9 +122,9 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc):
    `VIIRS Aerosol <https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/science-domain/aerosol/#viirs>`_ page::

      <rcbase>.dataset             :  AERDB_L2_VIIRS_SNPP
      <rcbase>.dataset             :  AERDB_L2_VIIRS_NOAA20
      <rcbase>.dataset             :  AERDT_L2_VIIRS_SNPP
      <rcbase>.dataset             :  AERDT_L2_VIIRS_NOAA20
      !<rcbase>.dataset             :  AERDB_L2_VIIRS_NOAA20
      !<rcbase>.dataset             :  AERDT_L2_VIIRS_SNPP
      !<rcbase>.dataset             :  AERDT_L2_VIIRS_NOAA20

    Eventually specify a target area, only orbits with some pixels within the defined box will be downloaded::

@@ -125,7 +132,7 @@ class CSO_EarthAccess_Inquire(utopya.UtopyaRc):
        <rcbase>.area             :
        !<rcbase>.area             :  -30,30,35,76

    Name of output csv file::
    Name of the output csv file::

        ! output table, here including date of today:
        <rcbase>.output.file            :  ${my.work}/AERDB_inquiry_%Y-%m-%d.csv
@@ -678,13 +685,214 @@ class CSO_EarthAccess_Download(utopya.UtopyaRc):

        # info ...
        logging.info(f"{indent}")
        logging.info(f"{indent}** end convert")
        logging.info(f"{indent}** end download")
        logging.info(f"{indent}")

    # enddef __init__


# endclass CSO_EarthAccess_Download


########################################################################
###
### create listing file for downloaded VIIRS files
###
########################################################################


class CSO_EarthAccess_Download_Listing(utopya.UtopyaRc):

    """
    Create *listing* file for files downloaded from VIIRS data portals.

    A *listing* file contains the names of the converted orbit files,
    the time range of pixels in the file, and other information extracted from the filenames or file attributes::

        filename                                                       ;start_time         ;end_time           ;orbit
        2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0112.002.2023268134001.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61711
        2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0248.002.2023268152045.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712
        2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0254.002.2023268154044.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61712
        2023/268/AERDB_L2_VIIRS_SNPP.A2023268.0430.002.2023268170054.nc;2023-09-25 00:00:00;2023-09-26 00:00:00;61713
        :

    This file could be used to scan for available files.

    In the settings, define the name of the file to be created::

        ! create listing of downloaded files;
        ! eventully include time templates %Y-%m-%d etc:
        <rcbase>.file        :  /Scratch/Copernicus/VIIRS/listing.csv

    Optionally define a creation mode for the (parent) directories::

        ! directory creation mode:
        <rcbase>.dmode              :  0o775

    An existing listing file is not replaced,
    unless the following flag is set::

        ! renew table?
        <rcbase>.renew              :  True

    Specify filename filters to search for data files:

        <rcbase>.pattern            :  AER*.nc

    """

    def __init__(self, rcfile, rcbase="", env={}, indent=""):
        """
        Convert data.
        """

        # modules:
        import os
        import datetime
        import fnmatch
        import collections

        # tools:
        import cso_file

        # info ...
        logging.info(f"{indent}")
        logging.info(f"{indent}** create listing file")
        logging.info(f"{indent}")

        # init base object:
        utopya.UtopyaRc.__init__(self, rcfile=rcfile, rcbase=rcbase, env=env)

        # directory creation mode:
        dmode = self.GetSetting("dmode", totype="int", default=None)

        # renew output?
        renew = self.GetSetting("renew", totype="bool")

        # table file to be written:
        lst_file = self.GetSetting("file")
        # evaluate current time:
        lst_file = datetime.datetime.now().strftime(lst_file)

        # create?
        if (not os.path.isfile(lst_file)) or renew:
            # info ..
            logging.info(f"{indent}create %s ..." % lst_file)

            # pattern for data files:
            fpattern = self.GetSetting("pattern")
            # info ..
            logging.info(f"{indent}  scan for datafiles: {fpattern}")
            
            # path to listing files, data files are search relative to this:
            bdir = os.path.dirname(lst_file)
            # current directory?
            if len(bdir) == 0:
                bdir = "."
            # info ...
            logging.info(f"{indent}  scan base directory: %s ..." % bdir)

            # create directory if necessary:
            cso_file.CheckDir( lst_file, dmode=dmode )

            # initiallize for (re)creation:
            listing = cso_file.CSO_Listing(indent=f"{indent}    ")

            # keep scanned roots for progress info:
            subdirs = []

            # recursively search for files:
            for root, dirs, files in os.walk(bdir):
                # loop over files:
                for fname in files:

                    # subdir relative to listing file:
                    subdir = os.path.relpath(root, start=bdir)
                    # info ...
                    if subdir not in subdirs :
                        # info ...
                        logging.info(f"{indent}    {subdir} ...")
                        # store:
                        subdirs.append(subdir)
                    #endif
                    
                    ## testing ..
                    #if subdir != "2022/007":
                    #    #logging.warning(f"{indent}      skip ...")
                    #    continue
                    ##endif

                    # data file?
                    if fnmatch.fnmatch(fname, fpattern):
                    
                        # expected filenames:
                        #   AERDB_L2_VIIRS_SNPP.A2022001.0342.002.2023076013614.nc
                        parts = fname.split(".")
                        if len(parts) == 6:

                            # second is year-julday, strip the "A" of acquisition:
                            try:
                                t1 = datetime.datetime.strptime(parts[1][1:],"%Y%j")
                            except:
                                logging.error(f"could not extract date from '{parts[1]}'")
                                raise Exception
                            #endtry
                            
                            # end time:
                            t2 = t1 + datetime.timedelta(1)
                            
                        else :
                            logging.error(f"unsupported filename: {fname}")
                            raise Exception
                        # endif
                        
                        # open for extra info:
                        sfile = cso_file.CSO_File( os.path.join(root,fname) )
                        # extract attributes:
                        orbit = sfile.GetAttr( "OrbitNumber" )
                        # done:
                        sfile.Close()

                        # fill data record:
                        data = collections.OrderedDict()
                        data["start_time"] = t1
                        data["end_time"] = t2
                        data["orbit"] = orbit

                        # update record:
                        listing.UpdateRecord( os.path.join(subdir,fname), data, indent=f"{indent}    ")
                        
                    # endfor # filename match

                # endfor # filenames

                ## testing ...
                #if len(listing) > 10 :
                #    break

            # endfor # walk over subdirs/files

            # adhoc ..
            listing.df = listing.df.astype( { "orbit" : int } )
            # sort on filename:
            listing.Sort( by="orbit" )
            # save:
            listing.Save(lst_file, dmode=dmode, indent=f"{indent}  ")

        else:
            # info ..
            logging.info(f"{indent}keep %s ..." % lst_file)
        # endif

        # info ...
        logging.info(f"{indent}")
        logging.info(f"{indent}** end listing")
        logging.info(f"{indent}")

    # enddef __init__


# endclass CSO_S5p_Download
# endclass CSO_EarthAccess_Download_Listing


########################################################################