TNO Intern

Commit f8c09e59 authored by Hen Brett's avatar Hen Brett 🐔
Browse files

Merge branch '9-test-how-transmissivity-is-calculated' into 'main'

Resolve "Test how transmissivity is calculated"

Closes #9

See merge request AGS/pythermogis!13
parents ec268d10 f4bbdbba
Loading
Loading
Loading
Loading
+112 −0
Original line number Diff line number Diff line
from os import path
from pathlib import Path
from unittest.case import TestCase

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from jpype.types import *

from pythermogis.statistics.calculate_thick_perm_trans import generate_thickness_permeability_transmissivity_for_pvalue
from pythermogis.thermogis_classes.jvm_start import start_jvm


class PyThermoGIS(TestCase):
    test_files_out_path = Path(path.dirname(path.dirname(__file__)), "resources") / "test_output"

    def test_transmissivity_calculation(self):
        """
        When calculating the Transmissivity values in the python code, we get a different value than the values from the java benchmark;
        This is because we have to calculate Transmissivity using a sampling method as it is the combination of a normal and log-normal distribution,
        The java code and the python code only use 10000 samples for ensuring the calculation is sped up; however this is course enough that with different random number generators they can be off by a few Dm,
        This test is simply to show that with enough samples the different methods converge; but the problem still exists; how do we efficiently calculate Transmissivity?
        :return:
        """

        thickness_mean = 10
        thickness_sd = 5
        ln_perm_mean = 5
        ln_perm_sd = 0.5
        nSamples = 100000000
        p_values_list = [10, 20, 30, 40, 50, 60, 70, 80, 90]

        #
        #   Python
        #
        # convert p_values list to a xarray DataArray; needed to ensure the dimensionality of the calculations
        p_values = xr.DataArray(
            data=p_values_list,
            dims=["p_value"],
            coords=dict(
                p_value=(["p_value"], p_values_list),
            ))

        # Calculate Thickness, Permeability and Transmissivity for each P-value using the Python implementation
        thickness, permeability, transmissivity = xr.apply_ufunc(generate_thickness_permeability_transmissivity_for_pvalue,
                                                                 thickness_mean,
                                                                 thickness_sd,
                                                                 ln_perm_mean,
                                                                 ln_perm_sd,
                                                                 p_values,
                                                                 kwargs={"nSamples": nSamples},
                                                                 input_core_dims=[[], [], [], [], []],
                                                                 output_core_dims=[[], [], []],
                                                                 vectorize=True
                                                                 )

        #
        #   JAVA
        #
        # Calculate Thickness, Permeability and Transmissivity for each P-value using the Java implementation
        thick_p_value_java, perm_p_value_java, trans_p_value_java = self.generate_transmissivity_java(ln_perm_mean, ln_perm_sd, nSamples, p_values_list, thickness_mean, thickness_sd)

        #
        #   Plotting
        #
        plot = False
        if plot:
            # Plot the transmissivity as a function of pvalue for both the python and Java implementation
            fig, axes = plt.subplots(1, 4, figsize=(20, 10))

            # Thick
            thickness.plot(ax=axes[0], label="python")
            axes[0].set_title(f"Thickness\nmax diff: {np.max(np.abs(thickness.values - thick_p_value_java))}")
            axes[0].plot(p_values_list, thick_p_value_java, label="java")

            # Perm
            permeability.plot(ax=axes[1], label="python")
            axes[1].set_title(f"Permeability\nmax diff: {np.max(np.abs(permeability.values - perm_p_value_java))}")
            axes[1].plot(p_values_list, perm_p_value_java, label="java")

            # Trans
            transmissivity.plot(ax=axes[2], label="python")
            axes[2].set_title(f"Transmissivity\nmax diff: {np.max(np.abs(transmissivity.values - trans_p_value_java))}")
            axes[2].plot(p_values_list, trans_p_value_java, label="java")

            axes[3].set_title("Difference between Python and Java Transmissivity")
            axes[3].scatter(p_values_list, trans_p_value_java - transmissivity)

            [ax.legend() for ax in axes]
            plt.savefig(self.test_files_out_path / "trans_test")

        # Assert
        self.assertTrue(np.allclose(transmissivity.values, trans_p_value_java, atol=1))

    def generate_transmissivity_java(self, ln_perm_mean, ln_perm_sd, nSamples, p_values_list, thickness_mean, thickness_sd):
        start_jvm()
        RNG = JClass("tno.geoenergy.stochastic.RandomNumberGenerator")
        NormalDistribution = JClass("tno.geoenergy.stochastic.NormalDistribution")
        rng = RNG(0)

        permeabilityDistribution = NormalDistribution(ln_perm_mean, ln_perm_sd, rng)
        thicknessDistribution = NormalDistribution(thickness_mean, thickness_sd, rng)

        perm_p_value_java = [permeabilityDistribution.calculateProbabilityValue(pValue, True) for pValue in p_values_list]
        thick_p_value_java = [thicknessDistribution.calculateProbabilityValue(pValue, False) for pValue in p_values_list]

        permeabilitySamples = permeabilityDistribution.generateSamples(nSamples)
        thicknessSamples = thicknessDistribution.generateSamples(nSamples)
        trans_samples = np.sort(np.exp(permeabilitySamples + np.log(thicknessSamples)))
        trans_p_value_java = [trans_samples[int((100 - p_value) / 100 * nSamples)] for p_value in p_values_list]

        return thick_p_value_java, perm_p_value_java, trans_p_value_java