TNO Intern

Commit abe2a4fd authored by Hen Brett's avatar Hen Brett 🐔
Browse files

Woah! It works really well, currently the testing seems to show that the best...

Woah! It works really well, currently the testing seems to show that the best number of simulations to chunk by is 100
parent 0d2101b9
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -87,13 +87,9 @@ def calculate_doublet_performance(reservoir_properties: xr.Dataset, utc_properti
    if "transmissivity" not in reservoir_properties:
        reservoir_properties["transmissivity"] = reservoir_properties["permeability"] * reservoir_properties["thickness"]

    # chunk reservoir properties to enable dask parralelization
    chunk_dict = auto_chunk_xarray(reservoir_properties, target_chunk_mb=50)
    reservoir_properties_chunk = reservoir_properties.chunk(chunk_dict)

    # Setup output_data dataset as a copy of reservoir properties
    output_data = reservoir_properties.copy()
    output_data = simulate_doublet(output_data, reservoir_properties_chunk, rng_seed, utc_properties).load()
    output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties).load()
    if print_execution_duration: print(f"Doublet simulation took {timeit.default_timer() - start:.1f} seconds")
    return output_data

+3 −5
Original line number Diff line number Diff line
@@ -106,12 +106,10 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
        reservoir_properties["mask"] = np.nan

    # chunk reservoir properties to enable dask parralelization
    chunk_dict = auto_chunk_xarray(reservoir_properties, target_chunk_mb=0.01)
    reservoir_properties_chunk = reservoir_properties.chunk(chunk_dict)
    reservoir_properties = reservoir_properties.chunk({'y':10, 'x':10})

    # Setup output_data dataset
    output_data = reservoir_properties_chunk["temperature"].copy().to_dataset()
    output_data["temperature"] = reservoir_properties_chunk["temperature"] * 1.0
    output_data = reservoir_properties["temperature"].copy().to_dataset()
    output_data = output_data.expand_dims({"p_value": p_values})

    # Calculate Thickness, Permeability and Transmissivity for each P-value
@@ -128,7 +126,7 @@ def calculate_doublet_performance_stochastic(reservoir_properties: xr.Dataset,
                                                                                                          dask_gufunc_kwargs={"allow_rechunk": True}
                                                                                                          )

    output_data = simulate_doublet(output_data, reservoir_properties_chunk, rng_seed, utc_properties).load()
    output_data = simulate_doublet(output_data, reservoir_properties, rng_seed, utc_properties).load()
    if print_execution_duration: print(f"Doublet simulation took {timeit.default_timer() - start:.1f} seconds")
    return output_data

+26 −16
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ import timeit

def test_dask_parralelization_deterministic():
    # generate simulation samples across desired reservoir properties
    Nsamples = 100000
    Nsamples = 5000
    thickness_samples = np.random.uniform(low=150, high=300, size=Nsamples)
    porosity_samples = np.random.uniform(low=0.5, high=0.8, size=Nsamples)
    ntg_samples = np.random.uniform(low=0.25, high=0.5, size=Nsamples)
@@ -24,21 +24,31 @@ def test_dask_parralelization_deterministic():
        },
        coords={"sample": np.arange(Nsamples)}
    )
    n_attempts = 3

    # For every sample, run a doublet simulation store the output values
    # normal
    time_attempt = []
    for attempt in range(n_attempts):
        start = timeit.default_timer()
        simulation_benchmark = calculate_doublet_performance(reservoir_properties)
        time_attempt.append(timeit.default_timer() - start)
    print(f"non-parralel simulation took {np.mean(time_attempt):.1f} seconds, {Nsamples/np.mean(time_attempt):.1f} samples per second")


    sample_chunks = [10,50,100,150,200,500,1000,2000,5000]

    # first run parralelized:
    for sample_chunk in sample_chunks:
        time_attempt=[]
        for attempt in range(n_attempts):
            start = timeit.default_timer()
    chunk_dict = auto_chunk_xarray(reservoir_properties, target_chunk_mb=0.01)
    print(chunk_dict)
    reservoir_properties_chunk = reservoir_properties.chunk(chunk_dict)
    simulations_parrallel = calculate_doublet_performance(reservoir_properties_chunk, print_execution_duration=False).load()
    print(f"parralel simulation (with chunking and unchunking) took {timeit.default_timer() - start:.1f} seconds")
            reservoir_properties_chunk = reservoir_properties.chunk({'sample': sample_chunk})
            simulations_parrallel = calculate_doublet_performance(reservoir_properties_chunk, print_execution_duration=False)
            time_attempt.append(timeit.default_timer() - start)
            xr.testing.assert_allclose(simulation_benchmark, simulations_parrallel)
            xr.testing.assert_equal(simulation_benchmark, simulations_parrallel)
        print(f"parralel simulation, chunk size: {sample_chunk}, took {np.mean(time_attempt):.1f} seconds to run {Nsamples} simulations, {Nsamples/np.mean(time_attempt):.1f} samples per second")



    # # normal
    start = timeit.default_timer()
    simulations = calculate_doublet_performance(reservoir_properties)
    print(f"non-parralel simulation took {timeit.default_timer() - start:.1f} seconds")
    xr.testing.assert_allclose(simulations, simulations_parrallel)
    xr.testing.assert_equal(simulations, simulations_parrallel)
 No newline at end of file