Checking that the tests run (0cbafbf0) · Commits · AGS_public / pythermogis

src/pythermogis/dask_utils/chunk_utils.py

+3 −3

Original line number	Diff line number	Diff line
		import numpy as np
		import xarray as xr

		def auto_chunk_dataset(dataset_to_chunk: xr.Dataset \| xr.DataArray, target_chunk_size: int = 100) -> xr.Dataset \| xr.DataArray:
		def auto_chunk_dataset(dataset_to_chunk: xr.Dataset \| xr.DataArray, target_chunk_size: int) -> xr.Dataset \| xr.DataArray:
		"""
		Automatically chunks a Dataset or DataArray so that each chunk contains
		approximately `target_chunk_size` total samples.

		Parameters:
		dataset_to_chunk: xarray.Dataset or xarray.DataArray
		target_chunk_size: Target total number of elements per chunk (default: 100)
		target_chunk_size: Target total number of elements per chunk

		Returns:
		Chunked xarray.Dataset or xarray.DataArray
		@@ -22,7 +22,7 @@ def auto_chunk_dataset(dataset_to_chunk: xr.Dataset \| xr.DataArray, target_chunk
		# Start with full size
		chunking = {dim: dim_sizes[dim] for dim in dataset_to_chunk.dims}

		# Greedy algorithm: reduce chunk size along largest dimensions
		# Greedy algorithm: reduce chunk size along largest dimensions, until target_chunk_size is reached
		current_chunk_size = total_size
		while current_chunk_size > target_chunk_size:
		# Sort dims by current chunk size (largest first)

+1 −1

Original line number	Diff line number	Diff line
		@@ -101,7 +101,7 @@ def calculate_doublet_performance(reservoir_properties: xr.Dataset, utc_properti

		output_data = reservoir_properties.copy()
		output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties)
		if chunk_size is not None: output_data = output_data.load()
		if chunk_size is not None: output_data.load()
		if print_execution_duration: print(f"Doublet simulation took {timeit.default_timer() - start:.1f} seconds")

		return output_data

+6 −0

Original line number	Diff line number	Diff line
		@@ -54,6 +54,12 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
		List of probability values (e.g., [0.1, 0.5, 0.9]) for the performance evaluation.
		If not provided, the default value of P50 (0.5) is used.

		chunk_size : int
		None by default, if set to an integer then chunking of the reservoir properties occurs.
		The chunk size is used to split up the number of simulations into "chunks" which can be processed in parallel using the dask framework.
		Chunk size involves trade-offs: smaller chunks = more parallelism, but more overhead, while larger chunks = less overhead, but can lead to memory pressure.
		The optimal chunk size is dependent on the hardware being used to run the simulation. The user should test to find the optimal chunk size.

		print_execution_duration : bool
		False by default, If set to True print the time in seconds it took to simulate across all reservoir properties