Source code for physped.visualization.plot_histograms

import logging
import pickle
from pathlib import Path
from typing import Any, Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from scipy.stats import entropy

from physped.core.parametrize_potential import (
    add_trajectories_to_histogram,
    digitize_trajectories_to_grid,
)
from physped.core.piecewise_potential import PiecewisePotential

log = logging.getLogger(__name__)


[docs] def create_histogram(values: pd.Series, bins: np.ndarray) -> dict: """ Create a histogram of the input values. Paramters: - values (pd.Series): A Pandas Series containing the values to bin. - bins (np.ndarray): An array of bin edges. Returns: - dict: A dictionary containing the bin edges, bin width, bin centers, counts, and PDF of the histogram. """ counts, bin_edges = np.histogram(values, bins) bin_width = bin_edges[1] - bin_edges[0] bin_centers = bin_edges[:-1] + bin_width / 2 PDF = counts / (counts.sum() * bin_width) return { "bin_edges": bin_edges, "bin_width": bin_width, "bin_centers": bin_centers, "counts": counts, "PDF": PDF, }
[docs] def create_all_histograms( recorded_paths: pd.DataFrame, simulated_paths: pd.DataFrame, config: dict, ): observables = config.params.histogram_plot.observables histograms = {} for traj_type, trajectories in zip( ["recorded", "simulated"], [recorded_paths, simulated_paths] ): histograms[traj_type] = {} for observable in observables: lims = config.params.histogram_plot[f"{observable}lims"] bins = np.linspace(lims[0], lims[1], 50) values = trajectories[observable] histograms[traj_type][observable] = create_histogram(values, bins) return histograms
[docs] def compute_joint_kl_divergence( piecewise_potential: PiecewisePotential, simulated_paths: pd.DataFrame, ) -> float: # We compare the probability distributions of the actual dynamics for # measurements and simulations recorded_paths_histogram = piecewise_potential.histogram simulated_paths = digitize_trajectories_to_grid( simulated_paths, piecewise_potential.lattice ) simulated_paths_histogram = np.zeros_like(recorded_paths_histogram) simulated_paths_histogram = add_trajectories_to_histogram( simulated_paths_histogram, simulated_paths, "fast_grid_indices" ) recorded_paths_histogram = np.where( recorded_paths_histogram == 0, np.nan, recorded_paths_histogram ) simulated_paths_histogram = np.where( simulated_paths_histogram == 0, np.nan, simulated_paths_histogram ) kl = entropy( recorded_paths_histogram, simulated_paths_histogram, nan_policy="omit", axis=(0, 1, 2, 3, 4), ) return kl
[docs] def compute_joint_kl_divergence_with_volume( piecewise_potential: PiecewisePotential, simulated_paths: pd.DataFrame, ) -> float: histogram_measurements = piecewise_potential.histogram cell_volume = piecewise_potential.lattice.cell_volume simulated_paths = digitize_trajectories_to_grid( simulated_paths, piecewise_potential.lattice ) histogram_simulations = np.zeros_like(histogram_measurements) histogram_simulations = add_trajectories_to_histogram( histogram_simulations, simulated_paths, "fast_grid_indices" ) histogram_simulations = np.where( histogram_simulations == 0, np.nan, histogram_simulations ) histogram_measurements = np.where( histogram_measurements == 0, np.nan, histogram_measurements ) prob_dist_measurements = histogram_measurements / np.nansum( histogram_measurements ) prob_dens_measurements = np.divide(prob_dist_measurements, cell_volume) prob_dist_simulations = histogram_simulations / np.nansum( histogram_simulations ) prob_dens_simulations = np.divide(prob_dist_simulations, cell_volume) # Compute Kullback-Leibler divergence kl = np.nansum( np.multiply( prob_dist_measurements, np.log(np.divide(prob_dens_measurements, prob_dens_simulations)), ) ) return kl
[docs] def plot_histogram( ax: Axes, histograms: Dict[str, Any], observable: str, hist_type: str, config: dict, ) -> Axes: """ Plot a histogram. Parameters: - ax (plt.Axes): The axes to plot the histogram on. - histograms (Dict[str, Any]): The histograms to plot. - observable (str): The observable to plot the histogram for. - hist_type (str): The type of histogram to plot. - kl_div (float): The KL divergence value for the histogram. Returns: - The axes object. """ ntrajs = { "recorded": config.params.input_ntrajs, "simulated": config.params.simulation.ntrajs, } labels = { "recorded": f"Measurements ($N =$ {ntrajs['recorded']})", "simulated": f"Simulations ($N =$ {ntrajs['simulated']})", } histogram_plot_params = config.params.histogram_plot for trajectory_type in ["recorded", "simulated"]: label = labels[trajectory_type] ax.scatter( histograms[trajectory_type][observable]["bin_centers"], histograms[trajectory_type][observable][hist_type], ec=histogram_plot_params[trajectory_type]["edgecolor"], fc=histogram_plot_params[trajectory_type]["facecolor"], label=label, s=histogram_plot_params[trajectory_type]["markersize"], ) ax.set_xlabel(histogram_plot_params[observable]["xlabel"]) ax.set_ylabel(histogram_plot_params[observable]["ylabel"][hist_type]) return ax
[docs] def save_joint_kl_divergence_to_file( joint_kl_divergence: float, config: dict ) -> None: kl_divergence = {} params = config.params kl_divergence["env_name"] = params.env_name kl_divergence["ntrajs"] = params.simulation.ntrajs kl_divergence["tauu"] = params.model.tauu kl_divergence["dt"] = params.model.dt kl_divergence["noise"] = params.model.sigma kl_divergence["joint_kl_divergence"] = joint_kl_divergence with open(Path.cwd() / "figures" / "kl_divergence.pkl", "wb") as f: pickle.dump(kl_divergence, f)
[docs] def plot_multiple_histograms( observables: List, histograms: dict, histogram_type: str, config: dict ): """ Plot histograms for all observables. Parameters: - ax (plt.Axes): The axes to plot the histogram on. - histograms (dict): The histograms to plot. - observable (str): The observable to plot the histogram for. - hist_type (str): The type of histogram to plot. - kl_div (float): The KL divergence value for the histogram. Returns: - The axes object. """ params = config.params width_single_panel = 1.77 height_single_panel = 1.18 subplot_grid = params.histogram_plot.subplot_grid fig = plt.figure( figsize=( width_single_panel * subplot_grid[1], height_single_panel * subplot_grid[0], ), layout="constrained", ) hist_plot_params = params.histogram_plot for plotid, observable in enumerate(observables): ax = fig.add_subplot(subplot_grid[0], subplot_grid[1], plotid + 1) ax = plot_histogram( ax, histograms, observable, hist_type=histogram_type, config=config, ) xlims = hist_plot_params.get(f"{observable}lims", None) ax.set_xlim(xlims) ymin, ymax = ax.get_ylim() ax.set_ylim(ymin * 2, ymax * 2) # props = {"facecolor": "white", "alpha": 1, "edgecolor": "black", # "lw": 0.5, "pad": 1.6} # ax.text( # 0.215, # 0.92, # f"$D_{{\\! K\\! L}}={kldiv:.3f}$", # transform=ax.transAxes, # ha="center", # va="center", # fontsize=6, # bbox=props, # ) handles, labels = ax.get_legend_handles_labels() fig.legend( handles, labels, bbox_to_anchor=(0.5, -0.05), ncol=2, fontsize=7, loc="center", ) folderpath = Path.cwd() / "figures" folderpath.mkdir(parents=True, exist_ok=True) filepath = folderpath / f"histograms_{params.env_name}.pdf" plt.savefig(filepath)