Loading src/pythermogis/transmissivity/calculate_thick_perm_trans.py +34 −18 Original line number Diff line number Diff line Loading @@ -57,29 +57,36 @@ def generate_thickness_permeability_transmissivity_for_pvalues(thickness_mean: f return thickness_pvalues, permeability_pvalues, transmissivity_pvalues_sampled @njit(parallel=True) def calculate_transmissivity( thickness_mean: NDArray[np.float64], thickness_sd: NDArray[np.float64], ln_perm_mean: NDArray[np.float64], ln_perm_sd: NDArray[np.float64], quantile: float, z_score: float, quantiles: NDArray[np.float64], z_scores: NDArray[np.float64], n_samples: int, ): shape = thickness_mean.shape n_pvalues = len(quantiles) # Flatten spatial dimensions tm_flat = thickness_mean.ravel() ts_flat = thickness_sd.ravel() lpm_flat = ln_perm_mean.ravel() lps_flat = ln_perm_sd.ravel() n_cells = tm_flat.size idx = int(quantile * n_samples) thickness_p = np.empty(n_cells) permeability_p = np.empty(n_cells) transmissivity_p = np.empty(n_cells) # Calculate indices for each quantile indices = np.empty(n_pvalues, dtype=np.int64) for p_idx in range(n_pvalues): indices[p_idx] = int(quantiles[p_idx] * n_samples) # Initialize output arrays with p_value dimension thickness_p = np.empty((n_cells, n_pvalues)) permeability_p = np.empty((n_cells, n_pvalues)) transmissivity_p = np.empty((n_cells, n_pvalues)) for i in prange(n_cells): tm = tm_flat[i] Loading @@ -88,12 +95,13 @@ def calculate_transmissivity( lps = lps_flat[i] if np.isnan(tm) or np.isnan(ts) or np.isnan(lpm) or np.isnan(lps): thickness_p[i] = np.nan permeability_p[i] = np.nan transmissivity_p[i] = np.nan for p_idx in range(n_pvalues): thickness_p[i, p_idx] = np.nan permeability_p[i, p_idx] = np.nan transmissivity_p[i, p_idx] = np.nan continue # Generate samples for this cell # Generate samples for this cell (same samples used for all p_values) thickness_samples = np.empty(n_samples) ln_perm_samples = np.empty(n_samples) Loading @@ -107,14 +115,22 @@ def calculate_transmissivity( # Transmissivity samples tr_samples = np.exp(ln_perm_samples) * thickness_samples tr_samples.sort() transmissivity_p[i] = tr_samples[idx] # Analytical percentiles thickness_p[i] = tm + z_score * ts if ts > 0 else tm permeability_p[i] = np.exp(lpm + z_score * lps) # Get transmissivity for each quantile for p_idx in range(n_pvalues): idx = indices[p_idx] transmissivity_p[i, p_idx] = tr_samples[idx] # Analytical percentiles for all p_values for p_idx in range(n_pvalues): z_score = z_scores[p_idx] thickness_p[i, p_idx] = tm + z_score * ts if ts > 0 else tm permeability_p[i, p_idx] = np.exp(lpm + z_score * lps) # Reshape to include p_value dimension output_shape = (*shape, n_pvalues) return ( thickness_p.reshape(shape), permeability_p.reshape(shape), transmissivity_p.reshape(shape), thickness_p.reshape(output_shape), permeability_p.reshape(output_shape), transmissivity_p.reshape(output_shape), ) No newline at end of file src/pythermogis/workflow/utc/utc.py +152 −91 Original line number Diff line number Diff line Loading @@ -104,21 +104,29 @@ def compute_results_for_aquifer( ) temperature = temp_xy_aligned.interp(z=mid_depth, method="linear") temperature = temperature.drop_vars('z', errors='ignore') aquifer_ds["temperature"] = temperature for p_value in config.p_values: # calc transmissivity(_with_ntg) stoch_results = apply_stochastics(aquifer_ds, p_value) # Calculate stochastics for all p_values at once (moved outside loop) print(f"Calculating stochastics for all p_values...") stoch_start = time.perf_counter() stoch_results = apply_stochastics(aquifer_ds, config.p_values) stoch_end = time.perf_counter() print(f"Stochastics calculation took: {stoch_end - stoch_start:.2f} seconds") # Add stochastic results to dataset (these now have p_value dimension) aquifer_ds["transmissivity"] = stoch_results["transmissivity"] aquifer_ds["transmissivity_with_ntg"] = stoch_results["transmissivity_with_ntg"] aquifer_ds["thickness"] = stoch_results["thickness"] aquifer_ds["permeability"] = stoch_results["permeability"] aquifer_ds["thickness_p"] = stoch_results["thickness"] aquifer_ds["permeability_p"] = stoch_results["permeability"] print(f"Calculating doublet performance...") calc_start = time.perf_counter() results = xr.apply_ufunc( cell_calculation, 1.0e30, aquifer_ds["thickness"], aquifer_ds["thickness_p"], aquifer_ds["transmissivity"], aquifer_ds["transmissivity_with_ntg"], aquifer_ds["ntg"], Loading @@ -126,17 +134,21 @@ def compute_results_for_aquifer( aquifer_ds["porosity"], aquifer_ds["temperature"], kwargs={"config": config}, input_core_dims=[[]] * 8, output_core_dims=[[] for _ in OUTPUT_NAMES], input_core_dims=[[], ["p_value"], ["p_value"], ["p_value"], [], [], [], []], output_core_dims=[["p_value"] for _ in OUTPUT_NAMES], vectorize=True, dask="parallelized", output_dtypes=[np.float64] * len(OUTPUT_NAMES), ) calc_end = time.perf_counter() print(f"Doublet calculation took: {calc_end - calc_start:.2f} seconds") # Add results to dataset with proper naming for name, result in zip(OUTPUT_NAMES, results, strict=True): aquifer_ds[f"{name}_p{p_value}"] = result aquifer_ds[name] = result aquifer_ds.compute() aquifer_ds = aquifer_ds.compute() print(f"Computed results for {aquifer_name}") Loading @@ -145,71 +157,112 @@ def compute_results_for_aquifer( def apply_stochastics( aquifer_ds: xr.Dataset, p_value: float, p_values: list[float], n_samples: int = 10_000, ) -> xr.Dataset: q = 1.0 - p_value / 100.0 z = ndtri(q) # Calculate quantiles and z-scores for all p_values quantiles = np.array([1.0 - p / 100.0 for p in p_values]) z_scores = ndtri(quantiles) # Calculate transmissivity for all p_values thickness_p, permeability_p, transmissivity_p = calculate_transmissivity( aquifer_ds["thickness"].values, aquifer_ds["thickness_sd"].values, np.log(aquifer_ds["permeability"].values), aquifer_ds["permeability_lnsd"].values, q, z, quantiles, z_scores, n_samples, ) transmissivity_with_ntg = transmissivity_p * aquifer_ds["ntg"] / 1000.0 transmissivity_with_ntg = transmissivity_with_ntg.where( transmissivity_with_ntg >= 0 # Broadcast ntg to match p_value dimension ntg_expanded = aquifer_ds["ntg"].values[..., np.newaxis] transmissivity_with_ntg = transmissivity_p * ntg_expanded / 1000.0 transmissivity_with_ntg = np.where( transmissivity_with_ntg >= 0, transmissivity_with_ntg, np.nan ) return xr.Dataset( # Create result dataset with p_value dimension result = xr.Dataset( { "thickness": (aquifer_ds["thickness"].dims, thickness_p), "permeability": (aquifer_ds["thickness"].dims, permeability_p), "transmissivity": (aquifer_ds["thickness"].dims, transmissivity_p), "transmissivity_with_ntg": transmissivity_with_ntg, "thickness": ( [*aquifer_ds["thickness"].dims, "p_value"], thickness_p, ), "permeability": ( [*aquifer_ds["thickness"].dims, "p_value"], permeability_p, ), "transmissivity": ( [*aquifer_ds["thickness"].dims, "p_value"], transmissivity_p, ), "transmissivity_with_ntg": ( [*aquifer_ds["thickness"].dims, "p_value"], transmissivity_with_ntg, ), }, coords={ **aquifer_ds.coords, "p_value": p_values, }, coords=aquifer_ds.coords, ) return result def cell_calculation( unknown_input_value: float, thickness: float, transmissivity: float, transmissivity_with_ntg: float, thickness: np.ndarray, # Now has p_value dimension transmissivity: np.ndarray, # Now has p_value dimension transmissivity_with_ntg: np.ndarray, # Now has p_value dimension ntg: float, depth: float, porosity: float, temperature: float, config: UTCConfiguration, ) -> DoubletOutput: inputs = [ unknown_input_value, thickness, transmissivity, transmissivity_with_ntg, ntg, depth, porosity, temperature, ] if any(np.isnan(v) for v in inputs): return NAN_OUTPUTS ) -> tuple[np.ndarray, ...]: """ Calculate doublet performance for all p_values at once. Returns tuple of arrays, each with shape (n_pvalues,). """ n_pvalues = len(thickness) # Initialize output arrays for each output variable outputs = {name: np.empty(n_pvalues) for name in OUTPUT_NAMES} # Check if any base inputs (scalars) are NaN base_inputs = [unknown_input_value, ntg, depth, porosity, temperature] if any(np.isnan(v) if np.isscalar(v) else False for v in base_inputs): for name in OUTPUT_NAMES: outputs[name][:] = np.nan return tuple(outputs[name] for name in OUTPUT_NAMES) # Process each p_value for p_idx in range(n_pvalues): thickness_val = thickness[p_idx] transmissivity_val = transmissivity[p_idx] transmissivity_with_ntg_val = transmissivity_with_ntg[p_idx] # Check if any inputs for this p_value are NaN if (np.isnan(thickness_val) or np.isnan(transmissivity_val) or np.isnan(transmissivity_with_ntg_val)): for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if transmissivity_with_ntg < config.kh_cutoff: return ZERO_OUTPUTS if transmissivity_with_ntg_val < config.kh_cutoff: for name in OUTPUT_NAMES: outputs[name][p_idx] = 0.0 continue doublet_input = DoubletInput( unknown_input_value=unknown_input_value, thickness=thickness, transmissivity=transmissivity, transmissivity_with_ntg=transmissivity_with_ntg, thickness=thickness_val, transmissivity=transmissivity_val, transmissivity_with_ntg=transmissivity_with_ntg_val, ntg=ntg, depth=depth, porosity=porosity / 100, Loading @@ -219,12 +272,20 @@ def cell_calculation( try: result = calculate_doublet_performance(config, doublet_input) except ValueError: return NAN_OUTPUTS for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if result is None: return NAN_OUTPUTS for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if result.utc > 1000: result = result._replace(utc=1000) # Store results for name in OUTPUT_NAMES: value = getattr(result, name) if name == "utc" and value > 1000: value = 1000 outputs[name][p_idx] = value return result return tuple(outputs[name] for name in OUTPUT_NAMES) No newline at end of file tests/utc/test_rosl_roslu.py +2 −1 Original line number Diff line number Diff line Loading @@ -44,13 +44,14 @@ def test_rosl_roslu(): ("welld", "welld") ] aquifer_ds = result.ROSL_ROSLU.to_dataset() for java_name, python_name in variables: for p_value in p_values: java_file = java_output_folder / f"ROSL_ROSLU__{java_name}_P{p_value}.nc" java_data = xr.open_dataset(java_file)["data"] python_data = getattr(result.ROSL_ROSLU, f"{python_name}_p{p_value}") python_data = aquifer_ds[python_name].sel(p_value=p_value) plot_grid_comparison( grid1=java_data, Loading Loading
src/pythermogis/transmissivity/calculate_thick_perm_trans.py +34 −18 Original line number Diff line number Diff line Loading @@ -57,29 +57,36 @@ def generate_thickness_permeability_transmissivity_for_pvalues(thickness_mean: f return thickness_pvalues, permeability_pvalues, transmissivity_pvalues_sampled @njit(parallel=True) def calculate_transmissivity( thickness_mean: NDArray[np.float64], thickness_sd: NDArray[np.float64], ln_perm_mean: NDArray[np.float64], ln_perm_sd: NDArray[np.float64], quantile: float, z_score: float, quantiles: NDArray[np.float64], z_scores: NDArray[np.float64], n_samples: int, ): shape = thickness_mean.shape n_pvalues = len(quantiles) # Flatten spatial dimensions tm_flat = thickness_mean.ravel() ts_flat = thickness_sd.ravel() lpm_flat = ln_perm_mean.ravel() lps_flat = ln_perm_sd.ravel() n_cells = tm_flat.size idx = int(quantile * n_samples) thickness_p = np.empty(n_cells) permeability_p = np.empty(n_cells) transmissivity_p = np.empty(n_cells) # Calculate indices for each quantile indices = np.empty(n_pvalues, dtype=np.int64) for p_idx in range(n_pvalues): indices[p_idx] = int(quantiles[p_idx] * n_samples) # Initialize output arrays with p_value dimension thickness_p = np.empty((n_cells, n_pvalues)) permeability_p = np.empty((n_cells, n_pvalues)) transmissivity_p = np.empty((n_cells, n_pvalues)) for i in prange(n_cells): tm = tm_flat[i] Loading @@ -88,12 +95,13 @@ def calculate_transmissivity( lps = lps_flat[i] if np.isnan(tm) or np.isnan(ts) or np.isnan(lpm) or np.isnan(lps): thickness_p[i] = np.nan permeability_p[i] = np.nan transmissivity_p[i] = np.nan for p_idx in range(n_pvalues): thickness_p[i, p_idx] = np.nan permeability_p[i, p_idx] = np.nan transmissivity_p[i, p_idx] = np.nan continue # Generate samples for this cell # Generate samples for this cell (same samples used for all p_values) thickness_samples = np.empty(n_samples) ln_perm_samples = np.empty(n_samples) Loading @@ -107,14 +115,22 @@ def calculate_transmissivity( # Transmissivity samples tr_samples = np.exp(ln_perm_samples) * thickness_samples tr_samples.sort() transmissivity_p[i] = tr_samples[idx] # Analytical percentiles thickness_p[i] = tm + z_score * ts if ts > 0 else tm permeability_p[i] = np.exp(lpm + z_score * lps) # Get transmissivity for each quantile for p_idx in range(n_pvalues): idx = indices[p_idx] transmissivity_p[i, p_idx] = tr_samples[idx] # Analytical percentiles for all p_values for p_idx in range(n_pvalues): z_score = z_scores[p_idx] thickness_p[i, p_idx] = tm + z_score * ts if ts > 0 else tm permeability_p[i, p_idx] = np.exp(lpm + z_score * lps) # Reshape to include p_value dimension output_shape = (*shape, n_pvalues) return ( thickness_p.reshape(shape), permeability_p.reshape(shape), transmissivity_p.reshape(shape), thickness_p.reshape(output_shape), permeability_p.reshape(output_shape), transmissivity_p.reshape(output_shape), ) No newline at end of file
src/pythermogis/workflow/utc/utc.py +152 −91 Original line number Diff line number Diff line Loading @@ -104,21 +104,29 @@ def compute_results_for_aquifer( ) temperature = temp_xy_aligned.interp(z=mid_depth, method="linear") temperature = temperature.drop_vars('z', errors='ignore') aquifer_ds["temperature"] = temperature for p_value in config.p_values: # calc transmissivity(_with_ntg) stoch_results = apply_stochastics(aquifer_ds, p_value) # Calculate stochastics for all p_values at once (moved outside loop) print(f"Calculating stochastics for all p_values...") stoch_start = time.perf_counter() stoch_results = apply_stochastics(aquifer_ds, config.p_values) stoch_end = time.perf_counter() print(f"Stochastics calculation took: {stoch_end - stoch_start:.2f} seconds") # Add stochastic results to dataset (these now have p_value dimension) aquifer_ds["transmissivity"] = stoch_results["transmissivity"] aquifer_ds["transmissivity_with_ntg"] = stoch_results["transmissivity_with_ntg"] aquifer_ds["thickness"] = stoch_results["thickness"] aquifer_ds["permeability"] = stoch_results["permeability"] aquifer_ds["thickness_p"] = stoch_results["thickness"] aquifer_ds["permeability_p"] = stoch_results["permeability"] print(f"Calculating doublet performance...") calc_start = time.perf_counter() results = xr.apply_ufunc( cell_calculation, 1.0e30, aquifer_ds["thickness"], aquifer_ds["thickness_p"], aquifer_ds["transmissivity"], aquifer_ds["transmissivity_with_ntg"], aquifer_ds["ntg"], Loading @@ -126,17 +134,21 @@ def compute_results_for_aquifer( aquifer_ds["porosity"], aquifer_ds["temperature"], kwargs={"config": config}, input_core_dims=[[]] * 8, output_core_dims=[[] for _ in OUTPUT_NAMES], input_core_dims=[[], ["p_value"], ["p_value"], ["p_value"], [], [], [], []], output_core_dims=[["p_value"] for _ in OUTPUT_NAMES], vectorize=True, dask="parallelized", output_dtypes=[np.float64] * len(OUTPUT_NAMES), ) calc_end = time.perf_counter() print(f"Doublet calculation took: {calc_end - calc_start:.2f} seconds") # Add results to dataset with proper naming for name, result in zip(OUTPUT_NAMES, results, strict=True): aquifer_ds[f"{name}_p{p_value}"] = result aquifer_ds[name] = result aquifer_ds.compute() aquifer_ds = aquifer_ds.compute() print(f"Computed results for {aquifer_name}") Loading @@ -145,71 +157,112 @@ def compute_results_for_aquifer( def apply_stochastics( aquifer_ds: xr.Dataset, p_value: float, p_values: list[float], n_samples: int = 10_000, ) -> xr.Dataset: q = 1.0 - p_value / 100.0 z = ndtri(q) # Calculate quantiles and z-scores for all p_values quantiles = np.array([1.0 - p / 100.0 for p in p_values]) z_scores = ndtri(quantiles) # Calculate transmissivity for all p_values thickness_p, permeability_p, transmissivity_p = calculate_transmissivity( aquifer_ds["thickness"].values, aquifer_ds["thickness_sd"].values, np.log(aquifer_ds["permeability"].values), aquifer_ds["permeability_lnsd"].values, q, z, quantiles, z_scores, n_samples, ) transmissivity_with_ntg = transmissivity_p * aquifer_ds["ntg"] / 1000.0 transmissivity_with_ntg = transmissivity_with_ntg.where( transmissivity_with_ntg >= 0 # Broadcast ntg to match p_value dimension ntg_expanded = aquifer_ds["ntg"].values[..., np.newaxis] transmissivity_with_ntg = transmissivity_p * ntg_expanded / 1000.0 transmissivity_with_ntg = np.where( transmissivity_with_ntg >= 0, transmissivity_with_ntg, np.nan ) return xr.Dataset( # Create result dataset with p_value dimension result = xr.Dataset( { "thickness": (aquifer_ds["thickness"].dims, thickness_p), "permeability": (aquifer_ds["thickness"].dims, permeability_p), "transmissivity": (aquifer_ds["thickness"].dims, transmissivity_p), "transmissivity_with_ntg": transmissivity_with_ntg, "thickness": ( [*aquifer_ds["thickness"].dims, "p_value"], thickness_p, ), "permeability": ( [*aquifer_ds["thickness"].dims, "p_value"], permeability_p, ), "transmissivity": ( [*aquifer_ds["thickness"].dims, "p_value"], transmissivity_p, ), "transmissivity_with_ntg": ( [*aquifer_ds["thickness"].dims, "p_value"], transmissivity_with_ntg, ), }, coords={ **aquifer_ds.coords, "p_value": p_values, }, coords=aquifer_ds.coords, ) return result def cell_calculation( unknown_input_value: float, thickness: float, transmissivity: float, transmissivity_with_ntg: float, thickness: np.ndarray, # Now has p_value dimension transmissivity: np.ndarray, # Now has p_value dimension transmissivity_with_ntg: np.ndarray, # Now has p_value dimension ntg: float, depth: float, porosity: float, temperature: float, config: UTCConfiguration, ) -> DoubletOutput: inputs = [ unknown_input_value, thickness, transmissivity, transmissivity_with_ntg, ntg, depth, porosity, temperature, ] if any(np.isnan(v) for v in inputs): return NAN_OUTPUTS ) -> tuple[np.ndarray, ...]: """ Calculate doublet performance for all p_values at once. Returns tuple of arrays, each with shape (n_pvalues,). """ n_pvalues = len(thickness) # Initialize output arrays for each output variable outputs = {name: np.empty(n_pvalues) for name in OUTPUT_NAMES} # Check if any base inputs (scalars) are NaN base_inputs = [unknown_input_value, ntg, depth, porosity, temperature] if any(np.isnan(v) if np.isscalar(v) else False for v in base_inputs): for name in OUTPUT_NAMES: outputs[name][:] = np.nan return tuple(outputs[name] for name in OUTPUT_NAMES) # Process each p_value for p_idx in range(n_pvalues): thickness_val = thickness[p_idx] transmissivity_val = transmissivity[p_idx] transmissivity_with_ntg_val = transmissivity_with_ntg[p_idx] # Check if any inputs for this p_value are NaN if (np.isnan(thickness_val) or np.isnan(transmissivity_val) or np.isnan(transmissivity_with_ntg_val)): for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if transmissivity_with_ntg < config.kh_cutoff: return ZERO_OUTPUTS if transmissivity_with_ntg_val < config.kh_cutoff: for name in OUTPUT_NAMES: outputs[name][p_idx] = 0.0 continue doublet_input = DoubletInput( unknown_input_value=unknown_input_value, thickness=thickness, transmissivity=transmissivity, transmissivity_with_ntg=transmissivity_with_ntg, thickness=thickness_val, transmissivity=transmissivity_val, transmissivity_with_ntg=transmissivity_with_ntg_val, ntg=ntg, depth=depth, porosity=porosity / 100, Loading @@ -219,12 +272,20 @@ def cell_calculation( try: result = calculate_doublet_performance(config, doublet_input) except ValueError: return NAN_OUTPUTS for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if result is None: return NAN_OUTPUTS for name in OUTPUT_NAMES: outputs[name][p_idx] = np.nan continue if result.utc > 1000: result = result._replace(utc=1000) # Store results for name in OUTPUT_NAMES: value = getattr(result, name) if name == "utc" and value > 1000: value = 1000 outputs[name][p_idx] = value return result return tuple(outputs[name] for name in OUTPUT_NAMES) No newline at end of file
tests/utc/test_rosl_roslu.py +2 −1 Original line number Diff line number Diff line Loading @@ -44,13 +44,14 @@ def test_rosl_roslu(): ("welld", "welld") ] aquifer_ds = result.ROSL_ROSLU.to_dataset() for java_name, python_name in variables: for p_value in p_values: java_file = java_output_folder / f"ROSL_ROSLU__{java_name}_P{p_value}.nc" java_data = xr.open_dataset(java_file)["data"] python_data = getattr(result.ROSL_ROSLU, f"{python_name}_p{p_value}") python_data = aquifer_ds[python_name].sel(p_value=p_value) plot_grid_comparison( grid1=java_data, Loading