TNO Intern

Commit 4f4b618e authored by Florian Knappers's avatar Florian Knappers
Browse files

made p values dim in aquifer dataset

parent c7c03c22
Loading
Loading
Loading
Loading
Loading
+34 −18
Original line number Diff line number Diff line
@@ -57,29 +57,36 @@ def generate_thickness_permeability_transmissivity_for_pvalues(thickness_mean: f

    return thickness_pvalues, permeability_pvalues, transmissivity_pvalues_sampled


@njit(parallel=True)
def calculate_transmissivity(
        thickness_mean: NDArray[np.float64],
        thickness_sd: NDArray[np.float64],
        ln_perm_mean: NDArray[np.float64],
        ln_perm_sd: NDArray[np.float64],
        quantile: float,
        z_score: float,
        quantiles: NDArray[np.float64],
        z_scores: NDArray[np.float64],
        n_samples: int,
):
    shape = thickness_mean.shape
    n_pvalues = len(quantiles)

    # Flatten spatial dimensions
    tm_flat = thickness_mean.ravel()
    ts_flat = thickness_sd.ravel()
    lpm_flat = ln_perm_mean.ravel()
    lps_flat = ln_perm_sd.ravel()

    n_cells = tm_flat.size
    idx = int(quantile * n_samples)

    thickness_p = np.empty(n_cells)
    permeability_p = np.empty(n_cells)
    transmissivity_p = np.empty(n_cells)
    # Calculate indices for each quantile
    indices = np.empty(n_pvalues, dtype=np.int64)
    for p_idx in range(n_pvalues):
        indices[p_idx] = int(quantiles[p_idx] * n_samples)

    # Initialize output arrays with p_value dimension
    thickness_p = np.empty((n_cells, n_pvalues))
    permeability_p = np.empty((n_cells, n_pvalues))
    transmissivity_p = np.empty((n_cells, n_pvalues))

    for i in prange(n_cells):
        tm = tm_flat[i]
@@ -88,12 +95,13 @@ def calculate_transmissivity(
        lps = lps_flat[i]

        if np.isnan(tm) or np.isnan(ts) or np.isnan(lpm) or np.isnan(lps):
            thickness_p[i] = np.nan
            permeability_p[i] = np.nan
            transmissivity_p[i] = np.nan
            for p_idx in range(n_pvalues):
                thickness_p[i, p_idx] = np.nan
                permeability_p[i, p_idx] = np.nan
                transmissivity_p[i, p_idx] = np.nan
            continue

        # Generate samples for this cell
        # Generate samples for this cell (same samples used for all p_values)
        thickness_samples = np.empty(n_samples)
        ln_perm_samples = np.empty(n_samples)

@@ -107,14 +115,22 @@ def calculate_transmissivity(
        # Transmissivity samples
        tr_samples = np.exp(ln_perm_samples) * thickness_samples
        tr_samples.sort()
        transmissivity_p[i] = tr_samples[idx]

        # Analytical percentiles
        thickness_p[i] = tm + z_score * ts if ts > 0 else tm
        permeability_p[i] = np.exp(lpm + z_score * lps)
        # Get transmissivity for each quantile
        for p_idx in range(n_pvalues):
            idx = indices[p_idx]
            transmissivity_p[i, p_idx] = tr_samples[idx]

        # Analytical percentiles for all p_values
        for p_idx in range(n_pvalues):
            z_score = z_scores[p_idx]
            thickness_p[i, p_idx] = tm + z_score * ts if ts > 0 else tm
            permeability_p[i, p_idx] = np.exp(lpm + z_score * lps)

    # Reshape to include p_value dimension
    output_shape = (*shape, n_pvalues)
    return (
        thickness_p.reshape(shape),
        permeability_p.reshape(shape),
        transmissivity_p.reshape(shape),
        thickness_p.reshape(output_shape),
        permeability_p.reshape(output_shape),
        transmissivity_p.reshape(output_shape),
    )
 No newline at end of file
+152 −91
Original line number Diff line number Diff line
@@ -104,21 +104,29 @@ def compute_results_for_aquifer(
    )
    temperature = temp_xy_aligned.interp(z=mid_depth, method="linear")
    temperature = temperature.drop_vars('z', errors='ignore')

    aquifer_ds["temperature"] = temperature

    for p_value in config.p_values:
        # calc transmissivity(_with_ntg)
        stoch_results = apply_stochastics(aquifer_ds, p_value)

    # Calculate stochastics for all p_values at once (moved outside loop)
    print(f"Calculating stochastics for all p_values...")
    stoch_start = time.perf_counter()
    stoch_results = apply_stochastics(aquifer_ds, config.p_values)
    stoch_end = time.perf_counter()
    print(f"Stochastics calculation took: {stoch_end - stoch_start:.2f} seconds")

    # Add stochastic results to dataset (these now have p_value dimension)
    aquifer_ds["transmissivity"] = stoch_results["transmissivity"]
    aquifer_ds["transmissivity_with_ntg"] = stoch_results["transmissivity_with_ntg"]
        aquifer_ds["thickness"] = stoch_results["thickness"]
        aquifer_ds["permeability"] = stoch_results["permeability"]
    aquifer_ds["thickness_p"] = stoch_results["thickness"]
    aquifer_ds["permeability_p"] = stoch_results["permeability"]

    print(f"Calculating doublet performance...")
    calc_start = time.perf_counter()

    results = xr.apply_ufunc(
        cell_calculation,
        1.0e30,
            aquifer_ds["thickness"],
        aquifer_ds["thickness_p"],
        aquifer_ds["transmissivity"],
        aquifer_ds["transmissivity_with_ntg"],
        aquifer_ds["ntg"],
@@ -126,17 +134,21 @@ def compute_results_for_aquifer(
        aquifer_ds["porosity"],
        aquifer_ds["temperature"],
        kwargs={"config": config},
            input_core_dims=[[]] * 8,
            output_core_dims=[[] for _ in OUTPUT_NAMES],
        input_core_dims=[[], ["p_value"], ["p_value"], ["p_value"], [], [], [], []],
        output_core_dims=[["p_value"] for _ in OUTPUT_NAMES],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[np.float64] * len(OUTPUT_NAMES),
    )

    calc_end = time.perf_counter()
    print(f"Doublet calculation took: {calc_end - calc_start:.2f} seconds")

    # Add results to dataset with proper naming
    for name, result in zip(OUTPUT_NAMES, results, strict=True):
            aquifer_ds[f"{name}_p{p_value}"] = result
        aquifer_ds[name] = result

    aquifer_ds.compute()
    aquifer_ds = aquifer_ds.compute()

    print(f"Computed results for {aquifer_name}")

@@ -145,71 +157,112 @@ def compute_results_for_aquifer(

def apply_stochastics(
    aquifer_ds: xr.Dataset,
    p_value: float,
    p_values: list[float],
    n_samples: int = 10_000,
) -> xr.Dataset:

    q = 1.0 - p_value / 100.0
    z = ndtri(q)
    # Calculate quantiles and z-scores for all p_values
    quantiles = np.array([1.0 - p / 100.0 for p in p_values])
    z_scores = ndtri(quantiles)

    # Calculate transmissivity for all p_values
    thickness_p, permeability_p, transmissivity_p = calculate_transmissivity(
        aquifer_ds["thickness"].values,
        aquifer_ds["thickness_sd"].values,
        np.log(aquifer_ds["permeability"].values),
        aquifer_ds["permeability_lnsd"].values,
        q,
        z,
        quantiles,
        z_scores,
        n_samples,
    )

    transmissivity_with_ntg = transmissivity_p * aquifer_ds["ntg"] / 1000.0
    transmissivity_with_ntg = transmissivity_with_ntg.where(
        transmissivity_with_ntg >= 0
    # Broadcast ntg to match p_value dimension
    ntg_expanded = aquifer_ds["ntg"].values[..., np.newaxis]
    transmissivity_with_ntg = transmissivity_p * ntg_expanded / 1000.0
    transmissivity_with_ntg = np.where(
        transmissivity_with_ntg >= 0, transmissivity_with_ntg, np.nan
    )

    return xr.Dataset(
    # Create result dataset with p_value dimension
    result = xr.Dataset(
        {
            "thickness": (aquifer_ds["thickness"].dims, thickness_p),
            "permeability": (aquifer_ds["thickness"].dims, permeability_p),
            "transmissivity": (aquifer_ds["thickness"].dims, transmissivity_p),
            "transmissivity_with_ntg": transmissivity_with_ntg,
            "thickness": (
                [*aquifer_ds["thickness"].dims, "p_value"],
                thickness_p,
            ),
            "permeability": (
                [*aquifer_ds["thickness"].dims, "p_value"],
                permeability_p,
            ),
            "transmissivity": (
                [*aquifer_ds["thickness"].dims, "p_value"],
                transmissivity_p,
            ),
            "transmissivity_with_ntg": (
                [*aquifer_ds["thickness"].dims, "p_value"],
                transmissivity_with_ntg,
            ),
        },
        coords={
            **aquifer_ds.coords,
            "p_value": p_values,
        },
        coords=aquifer_ds.coords,
    )

    return result


def cell_calculation(
        unknown_input_value: float,
    thickness: float,
    transmissivity: float,
    transmissivity_with_ntg: float,
        thickness: np.ndarray,  # Now has p_value dimension
        transmissivity: np.ndarray,  # Now has p_value dimension
        transmissivity_with_ntg: np.ndarray,  # Now has p_value dimension
        ntg: float,
        depth: float,
        porosity: float,
        temperature: float,
        config: UTCConfiguration,
) -> DoubletOutput:
    inputs = [
        unknown_input_value,
        thickness,
        transmissivity,
        transmissivity_with_ntg,
        ntg,
        depth,
        porosity,
        temperature,
    ]
    if any(np.isnan(v) for v in inputs):
        return NAN_OUTPUTS
) -> tuple[np.ndarray, ...]:
    """
    Calculate doublet performance for all p_values at once.
    Returns tuple of arrays, each with shape (n_pvalues,).
    """
    n_pvalues = len(thickness)

    # Initialize output arrays for each output variable
    outputs = {name: np.empty(n_pvalues) for name in OUTPUT_NAMES}

    # Check if any base inputs (scalars) are NaN
    base_inputs = [unknown_input_value, ntg, depth, porosity, temperature]
    if any(np.isnan(v) if np.isscalar(v) else False for v in base_inputs):
        for name in OUTPUT_NAMES:
            outputs[name][:] = np.nan
        return tuple(outputs[name] for name in OUTPUT_NAMES)

    # Process each p_value
    for p_idx in range(n_pvalues):
        thickness_val = thickness[p_idx]
        transmissivity_val = transmissivity[p_idx]
        transmissivity_with_ntg_val = transmissivity_with_ntg[p_idx]

        # Check if any inputs for this p_value are NaN
        if (np.isnan(thickness_val) or
                np.isnan(transmissivity_val) or
                np.isnan(transmissivity_with_ntg_val)):
            for name in OUTPUT_NAMES:
                outputs[name][p_idx] = np.nan
            continue

    if transmissivity_with_ntg < config.kh_cutoff:
        return ZERO_OUTPUTS
        if transmissivity_with_ntg_val < config.kh_cutoff:
            for name in OUTPUT_NAMES:
                outputs[name][p_idx] = 0.0
            continue

        doublet_input = DoubletInput(
            unknown_input_value=unknown_input_value,
        thickness=thickness,
        transmissivity=transmissivity,
        transmissivity_with_ntg=transmissivity_with_ntg,
            thickness=thickness_val,
            transmissivity=transmissivity_val,
            transmissivity_with_ntg=transmissivity_with_ntg_val,
            ntg=ntg,
            depth=depth,
            porosity=porosity / 100,
@@ -219,12 +272,20 @@ def cell_calculation(
        try:
            result = calculate_doublet_performance(config, doublet_input)
        except ValueError:
        return NAN_OUTPUTS
            for name in OUTPUT_NAMES:
                outputs[name][p_idx] = np.nan
            continue

        if result is None:
        return NAN_OUTPUTS
            for name in OUTPUT_NAMES:
                outputs[name][p_idx] = np.nan
            continue

    if result.utc > 1000:
        result = result._replace(utc=1000)
        # Store results
        for name in OUTPUT_NAMES:
            value = getattr(result, name)
            if name == "utc" and value > 1000:
                value = 1000
            outputs[name][p_idx] = value

    return result
    return tuple(outputs[name] for name in OUTPUT_NAMES)
 No newline at end of file
+2 −1
Original line number Diff line number Diff line
@@ -44,13 +44,14 @@ def test_rosl_roslu():
        ("welld", "welld")
    ]

    aquifer_ds = result.ROSL_ROSLU.to_dataset()

    for java_name, python_name in variables:
        for p_value in p_values:
            java_file = java_output_folder / f"ROSL_ROSLU__{java_name}_P{p_value}.nc"
            java_data = xr.open_dataset(java_file)["data"]

            python_data = getattr(result.ROSL_ROSLU, f"{python_name}_p{p_value}")
            python_data = aquifer_ds[python_name].sel(p_value=p_value)

            plot_grid_comparison(
                grid1=java_data,