TNO Intern

Commit e9dc223c authored by Hen Brett's avatar Hen Brett 🐔
Browse files

Checking that the tests run

parent faea72c8
Loading
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -7,3 +7,4 @@ from pythermogis.postprocessing.pos import *
from pythermogis.doublet_simulation.deterministic_doublet import *
from pythermogis.doublet_simulation.stochastic_doublet import *
from pythermogis.plotting.plot_exceedance import plot_exceedance
from pythermogis.dask_utils.chunk_utils  import auto_chunk_dataset
 No newline at end of file
+36 −0
Original line number Diff line number Diff line
import numpy as np
import xarray as xr

def auto_chunk_dataset(dataset_to_chunk: xr.Dataset | xr.DataArray, target_chunk_size: int = 100) -> xr.Dataset | xr.DataArray:
    """
    Automatically chunks a Dataset or DataArray so that each chunk contains
    approximately `target_chunk_size` total samples.

    Parameters:
        dataset_to_chunk: xarray.Dataset or xarray.DataArray
        target_chunk_size: Target total number of elements per chunk (default: 100)

    Returns:
        Chunked xarray.Dataset or xarray.DataArray
    """
    dim_sizes = dataset_to_chunk.sizes
    total_size = np.prod(list(dim_sizes.values()))

    if total_size <= target_chunk_size: # No need to chunk, return reservoir property as is
        return dataset_to_chunk

    # Start with full size
    chunking = {dim: dim_sizes[dim] for dim in dataset_to_chunk.dims}

    # Greedy algorithm: reduce chunk size along largest dimensions
    current_chunk_size = total_size
    while current_chunk_size > target_chunk_size:
        # Sort dims by current chunk size (largest first)
        dim_by_size = sorted(chunking.items(), key=lambda kv: kv[1], reverse=True)
        for dim, size in dim_by_size:
            if chunking[dim] > 1:
                chunking[dim] = max(1, chunking[dim] // 2)
                break  # reduce one dimension at a time
        current_chunk_size = np.prod(list(chunking.values()))

    return dataset_to_chunk.chunk(chunking)
 No newline at end of file
+0 −43
Original line number Diff line number Diff line
import numpy as np
import xarray as xr

def auto_chunk_xarray(xobj, target_chunk_mb=50):
    """
    Automatically chunks a Dataset or DataArray to aim for ~target_chunk_mb per chunk.

    Parameters:
        xobj: xr.Dataset or xr.DataArray
        target_chunk_mb: desired chunk size in megabytes (default: 50 MB)

    Returns:
        Dask-chunked version of the input xarray object.
    """
    def compute_chunk_shape(shape, dtype_size, ndim):
        total_elements = np.prod(shape)
        total_size = total_elements * dtype_size / 1e6  # in MB
        scale = max(total_size / target_chunk_mb, 1)
        return tuple(max(1, int(s / scale**(1/ndim))) for s in shape)

    if isinstance(xobj, xr.DataArray):
        shape = xobj.shape
        dims = xobj.dims
        dtype_size = xobj.dtype.itemsize
        chunk_shape = compute_chunk_shape(shape, dtype_size, len(shape))
        chunk_dict = dict(zip(dims, chunk_shape))
        return chunk_dict

    elif isinstance(xobj, xr.Dataset):
        # For datasets, compute union of shapes/dtypes across variables
        largest_var = max(
            xobj.data_vars.values(),
            key=lambda v: np.prod(v.shape) * v.dtype.itemsize
        )
        shape = largest_var.shape
        dims = largest_var.dims
        dtype_size = largest_var.dtype.itemsize
        chunk_shape = compute_chunk_shape(shape, dtype_size, len(shape))
        chunk_dict = dict(zip(dims, chunk_shape))
        return chunk_dict

    else:
        raise TypeError("Expected xarray.DataArray or xarray.Dataset")
 No newline at end of file
+5 −4
Original line number Diff line number Diff line
import timeit
import warnings
import xarray as xr

import numpy as np
import timeit
import xarray as xr

from dask_utils.dask_utils import auto_chunk_xarray
from pythermogis import simulate_doublet
from pythermogis.physics.temperature_grid_calculation import calculate_temperature_from_gradient
from pythermogis.thermogis_classes.utc_properties import instantiate_utc_properties_builder


def calculate_doublet_performance(reservoir_properties: xr.Dataset, utc_properties = None, rng_seed = None, print_execution_duration = False) -> xr.Dataset:
    """
    Perform a deterministic Doublet performance simulation.
@@ -118,7 +119,7 @@ def validate_input(reservoir_properties: xr.Dataset):
    if "permeability" not in reservoir_properties and "transmissivity" not in reservoir_properties:
        raise ValueError(f"provided reservoir properties Dataset must provide either permeability or transmissivity variables, currently neither are provided")
    if "permeability" in reservoir_properties and "transmissivity" in reservoir_properties:
        warnings.warn("Both reservoir permeabiltiy and transmissivity provided; however the doublet simulation will use only the provided transmissivity and ignore permeability (transmissivity = permeability * thickness)")
        warnings.warn("Both reservoir permeability and transmissivity provided; however the doublet simulation will use only the provided transmissivity and ignore permeability (transmissivity = permeability * thickness)")

    # check that certain variables are always >0
    always_positive = ["thickness", "permeability", "transmissivity"]
+8 −4
Original line number Diff line number Diff line
@@ -3,7 +3,8 @@ import xarray as xr
import numpy as np
import timeit

from pythermogis import simulate_doublet, auto_chunk_xarray
from pythermogis import simulate_doublet
from pythermogis.dask_utils.chunk_utils import auto_chunk_dataset
from pythermogis.physics.temperature_grid_calculation import calculate_temperature_from_gradient
from pythermogis.transmissivity.calculate_thick_perm_trans import generate_thickness_permeability_transmissivity_for_pvalues
from pythermogis.thermogis_classes.utc_properties import instantiate_utc_properties_builder
@@ -12,6 +13,7 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
                                             utc_properties = None,
                                             rng_seed = None,
                                             p_values: List[float] = [50.0],
                                             chunk_size: int | bool = False,
                                             print_execution_duration = False
                                             ) -> xr.Dataset:
    """
@@ -105,8 +107,9 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
    if "mask" not in reservoir_properties:
        reservoir_properties["mask"] = np.nan

    # chunk reservoir properties to enable dask parralelization
    reservoir_properties = reservoir_properties.chunk({'y':10, 'x':10})
    # if chunk_size is specified, then chunk reservoir_properties, (see descripting of chunk_size)
    if chunk_size:
        reservoir_properties = auto_chunk_dataset(reservoir_properties, chunk_size)

    # Setup output_data dataset
    output_data = reservoir_properties["temperature"].copy().to_dataset()
@@ -125,7 +128,8 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
                                                                                                          dask="parallelized",
                                                                                                          )

    output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties).load()
    output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties)
    if chunk_size: output_data.load() # If chunking has occured then the data must be de-chunked
    if print_execution_duration: print(f"Doublet simulation took {timeit.default_timer() - start:.1f} seconds")
    return output_data

Loading