Source code for pltviz.comp_line

"""
Comparative Line Plot
---------------------

Contents:
    comp_line
"""

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from pltviz import utils

default_sat = 0.95


[docs]def comp_line(
    df=None,
    dependent_cols=None,
    indep_stats=None,
    group_col=None,
    colors=None,
    stacked=False,
    percent=False,
    dsat=default_sat,
    axis=None,
):
    """
    Plots a line plot to compare statistics over a changing baseline.

    Parameters
    ----------
        df : pd.DataFrame
            Dataframe that contains statistics to be compared.

        dependent_cols : str or list (contains strs) (default=None)
            The column(s) in df which should be compared.

        indep_stats : str or list (contains ints or floats) (default=None)
            A df column or the baseline stats that generated the columns in dependent_cols.

        group_col : str (default=None)
            The name of the column in which groups are defined.

            Note: this allows plotting across multiple instances in a single column.

        colors : list or list of lists : optional (default=None)
            The colors of the groups as hex keys.

        stacked : bool (default=False)
            Whether the plot is a stackplot.

        percent : bool (default=False)
            Whether the y-axis should depict relative amounts or not.

        dsat : float : optional (default=default_sat)
            The degree of desaturation to be applied to the colors.

        axis : str : optional (default=None)
            Adds an axis to plots so they can be combined.

    Returns
    -------
        ax : matplotlib.pyplot.subplot
            A line plot that shows the shifts in group allocations given seat limits.
    """
    if colors == None:
        sns.set_palette("deep")  # default sns palette
        colors = [
            utils.rgb_to_hex(c) for c in sns.color_palette(n_colors=len(df), desat=1)
        ]

    if isinstance(colors, (str, tuple)):
        colors = [colors]

    # Check to see if colors haven't been formatted in a prior recursive step.
    if not isinstance(colors[0], tuple):
        colors = [
            utils.scale_saturation(rgb_trip=utils.hex_to_rgb(c), sat=default_sat)
            for c in colors
        ]
    sns.set_palette(colors)

    df_copy = df.copy()

    if isinstance(dependent_cols, str):
        # Assume that the user is passing a single column with values
        # corresponding to another column's.
        if dependent_cols in df_copy.columns:
            assert isinstance(indep_stats, str) and isinstance(
                df_copy[indep_stats], pd.Series
            ), "A corresponding column should be passed as 'indep_stats' if 'dependent_cols' is a single df column."
            assert (
                group_col != None
            ), "The 'group_col' argument must be passed if providing a single comparison column."

            # Create a similar form to the other path's df and recursively run this function.
            new_indep_stats = [
                utils.round_if_int(float(s)) for s in df_copy[indep_stats].unique()
            ]
            # Sort the baseline stats, as they're likely years, so objective is a
            # graph that's increasing in time.
            sorted_nbs = sorted(new_indep_stats)

            # Derive whether it already was sorted to know how to order the value assignment.
            was_sorted = sorted_nbs == new_indep_stats
            if was_sorted == True:
                was_sorted = -1
            else:
                was_sorted = 1

            new_dep_cols = [str(s) + "_" + dependent_cols for s in sorted_nbs]

            df_cols = ["locations"] + new_dep_cols

            df_new = pd.DataFrame(columns=df_cols)
            df_new["locations"] = df_copy[group_col].unique()

            for lctn in df_new["locations"]:
                df_new.loc[
                    df_new[df_new["locations"] == lctn].index, new_dep_cols
                ] = df_copy.loc[
                    df_copy[df_copy[group_col] == lctn].index, dependent_cols
                ].values[
                    ::was_sorted
                ]

            return comp_line(
                df=df_new,
                dependent_cols=new_dep_cols,
                indep_stats=new_indep_stats,
                colors=colors,
                stacked=stacked,
                percent=percent,
                dsat=dsat,
                axis=axis,
            )

        else:
            ValueError(
                "The 'dependent_cols' argument does not contain column labels for the provided dataframe."
            )

    if percent == True:
        for col in dependent_cols:
            df_copy[col] = df_copy[col] / sum(df_copy[col])

    if stacked:
        lol_allocations = []
        for i in df_copy.index:
            list_of_allocations = []
            for col in dependent_cols:
                list_of_allocations.append(df_copy.loc[i, col])

            lol_allocations.append(list_of_allocations)

        if axis:
            ax = axis  # to mirror seaborn axis plotting
        else:
            ax = plt.subplots()[1]
        print(indep_stats)
        print(lol_allocations)
        ax.stackplot(indep_stats, lol_allocations)

    else:
        if isinstance(dependent_cols, str):
            dependent_cols = [dependent_cols]
        for i in df_copy.index:
            ax = sns.lineplot(
                x=indep_stats, y=list(df_copy.loc[i, dependent_cols].values), ax=axis
            )

    if percent == True:
        ax.set_ylim([0, 1])

    ax.set_xlim([min(indep_stats), max(indep_stats)])

    return ax