Merge branch 'jd-fixes' into 'main' (c5fc7577) · Commits · AGS_public / pythermogis

examples/06_example_maps_p10_90.ipynb

+82 −83

File changed.

Preview size limit exceeded, changes collapsed.

pythermogis/dask.py

+0 −72

Original line number	Diff line number	Diff line
		import numpy as np
		import xarray as xr

		from pathlib import Path


		def auto_chunk_dataset(
		dataset_to_chunk: xr.Dataset \| xr.DataArray, target_chunk_size: int
		@@ -42,73 +40,3 @@ def auto_chunk_dataset(
		current_chunk_size = np.prod(list(chunking.values()))

		return dataset_to_chunk.copy().chunk(chunking)

		def assess_optimal_chunk_size(n_simulations: int = 1000, chunk_step_size: int = 100, plot_outfile : str \| Path = None):
		"""
		This runs the same set of doublet simulations using different chunk sizes and prints the results to the terminal to show find which chunk size is optimal.
		It runs each chunk size 3 times and takes the average of their times to find the time taken for that chunk size.
		The chunks which are tested go from 1 -> n_simulations with steps of chunk_step_size.

		Parameters
		----------
		n_simulations : int
		The number of simulations which are run in parralel
		chunk_step_size : int
		The step size of chunks to test; going from 1 to n_simulations
		plot_outfile : str \| Path
		If provided a plot is made showing the time taken per-chunk size and saved to this path

		"""
		reservoir_properties = xr.Dataset(
		{
		"thickness": (["sample"], np.ones(n_simulations) * 200),
		"porosity": (["sample"], np.ones(n_simulations)),
		"ntg": (["sample"], np.ones(n_simulations)),
		"depth": (["sample"], np.ones(n_simulations) * 1000),
		"permeability": (["sample"], np.ones(n_simulations) * 500),
		},
		coords={"sample": np.arange(n_simulations)}
		)

		n_attempts = 3 # do the same operation n_attempts and take an average of their times.
		sample_chunks = np.arange(1, n_simulations + 2, chunk_step_size)

		# run in series
		time_attempt = []
		for attempt in range(n_attempts):
		start = timeit.default_timer()
		simulation_benchmark = calculate_doublet_performance(reservoir_properties)
		time_attempt.append(timeit.default_timer() - start)
		normal_time = np.mean(time_attempt)
		print(f"non-parralel simulation took {normal_time:.1f} seconds, {n_simulations / normal_time:.1f} samples per second")


		# run in parallel:
		mean_time = []
		std_time = []
		for sample_chunk in sample_chunks:
		time_attempt = []
		for attempt in range(n_attempts):
		start = timeit.default_timer()
		simulations_parallel = calculate_doublet_performance(reservoir_properties, chunk_size=sample_chunk, print_execution_duration=False)
		time_attempt.append(timeit.default_timer() - start)

		# additional check that the results are identical
		xr.testing.assert_allclose(simulation_benchmark, simulations_parallel)
		xr.testing.assert_equal(simulation_benchmark, simulations_parallel)

		mean_time.append(np.mean(time_attempt))
		std_time.append(np.std(time_attempt))
		print(f"parralel simulation, chunk size: {sample_chunk}, took {np.mean(time_attempt):.1f} seconds to run {n_simulations} simulations, {n_simulations / mean_time[-1]:.1f} samples per second")

		if plot_outfile is None:
		return

		fig, ax = plt.subplots(1, 1, figsize=(8, 5))
		ax.errorbar(sample_chunks, mean_time, yerr=std_time, fmt='o', capsize=5, label='parralel simulation')
		ax.axhline(normal_time, label="non-parralel simulation", color="tab:orange", linestyle="--")
		ax.set_xlabel("chunk size")
		ax.set_ylabel("time (s)")
		ax.set_title("Chunk size assessment")
		ax.legend()
		plt.savefig(plot_outfile)
		No newline at end of file

pythermogis/doublet.py

+6 −0

Original line number	Diff line number	Diff line
		@@ -87,6 +87,9 @@ class ThermoGISDoubletResults(BaseModel):
		transmissivity: FloatOrArray
		permeability: FloatOrArray \| None
		temperature: FloatOrArray
		ntg: FloatOrArray
		porosity: FloatOrArray
		depth: FloatOrArray

		def to_dataset(self) -> xr.Dataset:
		"""Convert results to an :class:`xarray.Dataset`.
		@@ -271,6 +274,9 @@ class ThermoGISDoublet:
		transmissivity=transmissivity,
		permeability=permeability,
		temperature=self.aquifer.temperature,
		porosity=self.aquifer.porosity,
		ntg=self.aquifer.ntg,
		depth=self.aquifer.depth,
		)

		def _resolve_stochastic_properties(