Source code for boa.plotting

"""
###################################
Plotting Utils
###################################

Plotting utility functions
"""
from __future__ import annotations

import os
from itertools import combinations
from typing import List, Union

import numpy as np
import pandas as pd
import panel as pn
import plotly.graph_objs as go
from ax.modelbridge.registry import get_model_from_generator_run
from ax.plot.contour import plot_contour_plotly
from ax.plot.helper import get_range_parameters_from_list
from ax.plot.pareto_frontier import plot_pareto_frontier as ax_plot_pareto_frontier
from ax.plot.pareto_utils import compute_posterior_pareto_frontier
from ax.plot.slice import interact_slice_plotly
from ax.plot.trace import optimization_trace_single_method_plotly
from ax.service.utils.report_utils import exp_to_df

from boa.definitions import PathLike_tup
from boa.scheduler import Scheduler
from boa.storage import scheduler_from_json_file

SchedulerOrPath = Union[Scheduler, os.PathLike, str]
SchedulersOrPathList = Union[List[Scheduler], List[Union[os.PathLike, str]], Scheduler, os.PathLike, str]


DEFAULT_CI_LEVEL: float = 0.9
pn.extension("plotly")


__all__ = [
    "plot_contours",
    "plot_metrics_trace",
    "plot_pareto_frontier",
    "plot_slice",
    "scheduler_to_df",
    "app_view",
]


def _maybe_load_scheduler(scheduler: SchedulerOrPath):
    if isinstance(scheduler, PathLike_tup):
        scheduler = scheduler_from_json_file(scheduler)
        model = get_model_from_generator_run(
            generator_run=scheduler.generation_strategy.last_generator_run,
            experiment=scheduler.experiment,
            data=scheduler.experiment.fetch_data(),
            models_enum=type(scheduler.generation_strategy.current_step.model),
            after_gen=False,
        )
        scheduler.model = model

    return scheduler


def _maybe_load_schedulers(schedulers: SchedulersOrPathList):
    if not isinstance(schedulers, list):
        schedulers = [schedulers]
    for i, scheduler in enumerate(schedulers):
        schedulers[i] = _maybe_load_scheduler(scheduler)
    return schedulers


[docs]def scheduler_to_df(scheduler: SchedulerOrPath, **kwargs) -> pd.DataFrame:
    """
    Transforms an scheduler's experiment to a DataFrame with rows keyed by trial_index
    and arm_name, metrics pivoted into one row. If the pivot results in more than
    one row per arm (or one row per ``arm * map_keys`` combination if ``map_keys`` are
    present), results are omitted and warning is produced.

    Transforms an ``Experiment`` into a ``pd.DataFrame``.

    Parameters
    ----------
    scheduler
        Initialized scheduler or path to `scheduler.json file`.
    **kwargs
        key word arguments to pass to AXs `exp_to_df`

    Returns
    -------
    A dataframe of inputs, metadata and metrics by trial and arm (and
    ``map_keys``, if present). If no trials are available, returns an empty
    dataframe.
    """
    experiment = scheduler.experiment
    return exp_to_df(exp=experiment, **kwargs)


[docs]def plot_metrics_trace(
    schedulers: SchedulersOrPathList,
    metric_names: list[str] = None,
    title: str = "Metric Performance vs. # of Iterations",
    **kwargs,
):
    """Plots an optimization trace with mean and 2 SEMs

    Parameters
    ----------
    schedulers
        List of initialized scheduler or path to `scheduler.json file`
        or single initialized scheduler or path to `scheduler.json file`
    metric_names
        metric name or list of metric names to restrict dropdowns to. If None, will use all metric names.
    title
        The title of plot
    **kwargs
        key word arguments to pass to AXs `optimization_trace_single_method_plotly`

    """

    schedulers = _maybe_load_schedulers(schedulers)

    if not metric_names:
        metric_names = list(schedulers[0].experiment.metrics.keys())
    metric_name = pn.widgets.Select(name="Metric Name", options=metric_names)

    def get_plot(metric_name):
        model_transitions = set()
        ys = []
        for scheduler in schedulers:
            data = scheduler.experiment.fetch_data()
            ys.append(data.df[data.df["metric_name"] == metric_name]["mean"])
            model_transitions.update(scheduler.generation_strategy.model_transitions)
        ys = np.array(ys)
        ylabel = metric_name.title()

        return pn.pane.Plotly(
            optimization_trace_single_method_plotly(
                y=ys,
                ylabel=ylabel,
                model_transitions=list(model_transitions),
                # Try and use the metric's lower_is_better property, but fall back on
                # objective's minimize property if relevent
                optimization_direction=(
                    (
                        "minimize"
                        if schedulers[0].experiment.metrics[metric_name].lower_is_better is True
                        else "maximize"
                    )
                    if schedulers[0].experiment.metrics[metric_name].lower_is_better is not None
                    else ("minimize" if schedulers[0].experiment.optimization_config.objective.minimize else "maximize")
                ),
                plot_trial_points=True,
                **kwargs,
            ),
            sizing_mode="stretch_width",
        )

    return pn.Column("## " + title, pn.Row(metric_name), pn.bind(get_plot, metric_name), sizing_mode="stretch_width")


[docs]def plot_contours(
    scheduler: SchedulerOrPath,
    metric_names: list[str] = None,
    title: str = "Metric Contours Plot",
    **kwargs,
):
    """Plot predictions for a 2-d slice of the parameter space.

    Parameters
    ----------
    scheduler
        Initialized scheduler or path to `scheduler.json file`.
    metric_names
        metric name or list of metric names to restrict dropdowns to. If None, will use all metric names.
    title
        The title of plot
    **kwargs
        key word arguments to pass to AXs `plot_contour_plotly`
    """
    scheduler = _maybe_load_scheduler(scheduler)

    model = scheduler.model

    if not metric_names:
        metric_names = list(scheduler.experiment.metrics.keys())
    range_parameters = get_range_parameters_from_list(list(scheduler.experiment.parameters.values()), min_num_values=5)
    param_names1 = [parameter.name for i, parameter in enumerate(range_parameters) if i != 1]
    param_names2 = [parameter.name for i, parameter in enumerate(range_parameters) if i != 0]

    #     is_log_dict: Dict[str, bool] = {}
    #     grid_dict: Dict[str, np.ndarray] = {}
    #     for parameter in range_parameters:
    #         is_log_dict[parameter.name] = parameter.log_scale
    #         grid_dict[parameter.name] = get_grid_for_parameter(parameter, density)

    # Populate `f_dict` (the predicted expectation value of `metric_name`) and
    # `sd_dict` (the predicted SEM), each of which represents a 2D array of plots
    # where each parameter can be assigned to each of the x or y axes.

    #     f_dict: Dict[str, Dict[str, np.ndarray]] = {
    #         param1: {param2: [] for param2 in param_names} for param1 in param_names
    #     }

    #     sd_dict: Dict[str, Dict[str, np.ndarray]] = {
    #         param1: {param2: [] for param2 in param_names} for param1 in param_names
    #
    metric_name = pn.widgets.Select(name="Metric Name", options=metric_names)

    param_x = pn.widgets.Select(name="Param X", options=param_names1)
    param_y = pn.widgets.Select(name="Param Y", options=param_names2)

    def get_plot(metric_name, param_x, param_y):
        lower_is_better = (
            scheduler.experiment.metrics[metric_name].lower_is_better
            if scheduler.experiment.metrics[metric_name].lower_is_better is not None
            else scheduler.experiment.optimization_config.objective.minimize
        )
        return plot_contour_plotly(
            model=model,
            lower_is_better=lower_is_better,
            param_x=param_x,
            param_y=param_y,
            metric_name=metric_name,
            **kwargs,
        )

    #         plot_data, _, _ = get_plot_data(
    #             model=model, generator_runs_dict=generator_runs_dict, metric_names={metric_name},
    #         )
    #         _, f_plt, sd_plt, _, _, _ = _get_contour_predictions(
    #                 model=model,
    #                 x_param_name=param1,
    #                 y_param_name=param2,
    #                 metric=metric_name,
    #                 generator_runs_dict=generator_runs_dict,
    #                 density=density,
    #                 slice_values=slice_values,
    #                 fixed_features=fixed_features,
    #             )
    #         f_dict[param1][param2] = f_plt
    #         sd_dict[param1][param2] = sd_plt
    #         return pn.Row(f_plt, sd_plt)

    #         return interact_contour_plotly(model=model, metric_name=metric_name, lower_is_better=lower_is_better)

    col = pn.Column(
        "## " + title, pn.Row(metric_name, param_x, param_y), get_plot(metric_name.value, param_x.value, param_y.value)
    )

    def update(event):
        param_x.options = [param.name for param in range_parameters if param.name != param_y.value]
        param_y.options = [param.name for param in range_parameters if param.name != param_x.value]
        col[-1].object = get_plot(metric_name.value, param_x.value, param_y.value)

    metric_name.param.watch(update, "value")
    param_x.param.watch(update, "value")
    param_y.param.watch(update, "value")

    return col


[docs]def plot_slice(scheduler: SchedulerOrPath, **kwargs):
    """Create interactive plot with predictions for a 1-d slice of the parameter
    space.

    Parameters
    ----------
    scheduler
        Initialized scheduler or path to `scheduler.json file`.
    **kwargs
        key word arguments to pass to AXs `interact_slice_plotly`
    """
    scheduler = _maybe_load_scheduler(scheduler)

    model = scheduler.model
    return pn.pane.Plotly(interact_slice_plotly(model=model, **kwargs))


[docs]def plot_pareto_frontier(
    scheduler: SchedulerOrPath,
    metric_names: list[str] | None = None,
    num_points: int = 20,
    CI_level: float = DEFAULT_CI_LEVEL,  # noqa
):
    """Plot a Pareto frontier from a scheduler.

    Parameters
    ----------
    scheduler
        Initialized scheduler or path to `scheduler.json file`.
    metric_names
        metric name or list of metric names to restrict dropdowns to. If None, will use all metric names.
    num_points
        The number of points to compute on the Pareto frontier.
    CI_level
        The confidence level, i.e. 0.95 (95%)
    """
    scheduler = _maybe_load_scheduler(scheduler)
    experiment = scheduler.experiment
    if metric_names:
        for m in metric_names:
            if m not in scheduler.experiment.metrics:
                raise TypeError(f"metric name {m} not found, check spelling of metric name")
        metric_names = [m for name, m in scheduler.experiment.metrics.items() if name in metric_names]
        metric_combos = combinations(metric_names, 2)

    #     if not metric1 or not metric2:
    #         if len(experiment.metrics) != 2:
    #             raise TypeError(
    #                 "When plotting a pareto frontier, you must either be using a optimization that has exactly"
    #                 " 2 objectives (metrics), or supply your metrics yourself."
    #             )
    #         metric1, metric2 = experiment.metrics.keys()
    else:
        metric_combos = combinations(scheduler.experiment.metrics.values(), 2)

    frontier_list = []
    for ms in metric_combos:
        primary_objective, secondary_objective = ms

        frontier = compute_posterior_pareto_frontier(
            experiment=experiment,
            data=experiment.fetch_data(),
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
            absolute_metrics=[m.name for m in experiment.metrics.values()],
            num_points=num_points,
        )
        frontier_list.append(frontier)

    traces = []
    shapes = []
    for frontier in frontier_list:
        config = ax_plot_pareto_frontier(
            frontier=frontier,
            CI_level=CI_level,
        )
        traces.append(config.data["data"][0])
        shapes.append(config.data["layout"].get("shapes", []))

    for i, trace in enumerate(traces):
        if i == 0:  # Only the first trace is initially set to visible
            trace["visible"] = True
        else:  # All other plot traces are not visible initially
            trace["visible"] = False

    # TODO (jej): replace dropdown with two dropdowns, one for x one for y.
    dropdown = []
    for i, frontier in enumerate(frontier_list):
        trace_cnt = 1
        # Only one plot trace is visible at a given time.
        visible = [False] * (len(frontier_list) * trace_cnt)
        for j in range(i * trace_cnt, (i + 1) * trace_cnt):
            visible[j] = True
        rel_y = frontier.primary_metric not in frontier.absolute_metrics
        rel_x = frontier.secondary_metric not in frontier.absolute_metrics
        primary_metric = frontier.primary_metric
        secondary_metric = frontier.secondary_metric
        dropdown.append(
            {
                "method": "update",
                "args": [
                    {"visible": visible, "method": "restyle"},
                    {
                        "yaxis.title": primary_metric,
                        "xaxis.title": secondary_metric,
                        "yaxis.ticksuffix": "%" if rel_y else "",
                        "xaxis.ticksuffix": "%" if rel_x else "",
                        "shapes": shapes[i],
                    },
                ],
                "label": f"{primary_metric} vs {secondary_metric}",
            }
        )

    # Set initial layout arguments.
    initial_frontier = frontier_list[0]
    rel_x = initial_frontier.secondary_metric not in initial_frontier.absolute_metrics
    rel_y = initial_frontier.primary_metric not in initial_frontier.absolute_metrics
    secondary_metric = initial_frontier.secondary_metric
    primary_metric = initial_frontier.primary_metric

    layout = go.Layout(
        title="Pareto Frontier",
        xaxis={
            "title": secondary_metric,
            "ticksuffix": "%" if rel_x else "",
            "zeroline": True,
        },
        yaxis={
            "title": primary_metric,
            "ticksuffix": "%" if rel_y else "",
            "zeroline": True,
        },
        updatemenus=[
            {
                "buttons": dropdown,
                "x": 0.075,
                "xanchor": "left",
                "y": 1.1,
                "yanchor": "middle",
            }
        ],
        hovermode="closest",
        legend={"orientation": "h"},
        width=750,
        height=500,
        margin=go.layout.Margin(pad=4, l=225, b=75, t=75),  # noqa E741
        shapes=shapes[0],
    )

    fig = go.Figure(data=traces, layout=layout)
    return pn.pane.Plotly(fig)


[docs]def app_view(
    scheduler: SchedulerOrPath,
    metric_names: list[str] | None = None,
):
    """Creates a web view of a variety of EDA plots from a scheduler.

    Parameters
    ----------
    scheduler
        Initialized scheduler or path to `scheduler.json file`.
    metric_names
        metric name or list of metric names to restrict dropdowns to. If None, will use all metric names.
    """
    scheduler = _maybe_load_scheduler(scheduler)
    view = pn.Column()
    if scheduler.experiment.is_moo_problem:
        pareto = plot_pareto_frontier(scheduler=scheduler, metric_names=metric_names)
    else:
        pareto = None
    row1 = pn.Row(plot_metrics_trace(schedulers=scheduler, metric_names=metric_names))
    if pareto:
        row1.append(pareto)
    view.append(row1)
    view.append(plot_slice(scheduler=scheduler))
    view.append(plot_contours(scheduler=scheduler, metric_names=metric_names))
    view.append(scheduler_to_df(scheduler))

    template = pn.template.BootstrapTemplate(
        site="BOA",
        main=[view],
    )

    return template.servable()