TNO Intern

Commit 0cbafbf0 authored by Hen Brett's avatar Hen Brett 🐔
Browse files

Checking that the tests run

parent 83f3d4d9
Loading
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
import numpy as np
import xarray as xr

def auto_chunk_dataset(dataset_to_chunk: xr.Dataset | xr.DataArray, target_chunk_size: int = 100) -> xr.Dataset | xr.DataArray:
def auto_chunk_dataset(dataset_to_chunk: xr.Dataset | xr.DataArray, target_chunk_size: int) -> xr.Dataset | xr.DataArray:
    """
    Automatically chunks a Dataset or DataArray so that each chunk contains
    approximately `target_chunk_size` total samples.

    Parameters:
        dataset_to_chunk: xarray.Dataset or xarray.DataArray
        target_chunk_size: Target total number of elements per chunk (default: 100)
        target_chunk_size: Target total number of elements per chunk

    Returns:
        Chunked xarray.Dataset or xarray.DataArray
@@ -22,7 +22,7 @@ def auto_chunk_dataset(dataset_to_chunk: xr.Dataset | xr.DataArray, target_chunk
    # Start with full size
    chunking = {dim: dim_sizes[dim] for dim in dataset_to_chunk.dims}

    # Greedy algorithm: reduce chunk size along largest dimensions
    # Greedy algorithm: reduce chunk size along largest dimensions, until target_chunk_size is reached
    current_chunk_size = total_size
    while current_chunk_size > target_chunk_size:
        # Sort dims by current chunk size (largest first)
+1 −1
Original line number Diff line number Diff line
@@ -101,7 +101,7 @@ def calculate_doublet_performance(reservoir_properties: xr.Dataset, utc_properti

    output_data = reservoir_properties.copy()
    output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties)
    if chunk_size is not None: output_data = output_data.load()
    if chunk_size is not None: output_data.load()
    if print_execution_duration: print(f"Doublet simulation took {timeit.default_timer() - start:.1f} seconds")

    return output_data
+6 −0
Original line number Diff line number Diff line
@@ -54,6 +54,12 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
        List of probability values (e.g., [0.1, 0.5, 0.9]) for the performance evaluation.
        If not provided, the default value of P50 (0.5) is used.

    chunk_size : int
        None by default, if set to an integer then chunking of the reservoir properties occurs.
        The chunk size is used to split up the number of simulations into "chunks" which can be processed in parallel using the dask framework.
        Chunk size involves trade-offs: smaller chunks = more parallelism, but more overhead, while larger chunks = less overhead, but can lead to memory pressure.
        The optimal chunk size is dependent on the hardware being used to run the simulation. The user should test to find the optimal chunk size.

    print_execution_duration : bool
        False by default, If set to True print the time in seconds it took to simulate across all reservoir properties