Source code for pltviz.comp_line

"""
Comparative Line Plot
---------------------

Contents:
    comp_line
"""

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from pltviz import utils

default_sat = 0.95


[docs]def comp_line( df=None, dependent_cols=None, indep_stats=None, group_col=None, colors=None, stacked=False, percent=False, dsat=default_sat, axis=None, ): """ Plots a line plot to compare statistics over a changing baseline. Parameters ---------- df : pd.DataFrame Dataframe that contains statistics to be compared. dependent_cols : str or list (contains strs) (default=None) The column(s) in df which should be compared. indep_stats : str or list (contains ints or floats) (default=None) A df column or the baseline stats that generated the columns in dependent_cols. group_col : str (default=None) The name of the column in which groups are defined. Note: this allows plotting across multiple instances in a single column. colors : list or list of lists : optional (default=None) The colors of the groups as hex keys. stacked : bool (default=False) Whether the plot is a stackplot. percent : bool (default=False) Whether the y-axis should depict relative amounts or not. dsat : float : optional (default=default_sat) The degree of desaturation to be applied to the colors. axis : str : optional (default=None) Adds an axis to plots so they can be combined. Returns ------- ax : matplotlib.pyplot.subplot A line plot that shows the shifts in group allocations given seat limits. """ if colors == None: sns.set_palette("deep") # default sns palette colors = [ utils.rgb_to_hex(c) for c in sns.color_palette(n_colors=len(df), desat=1) ] if isinstance(colors, (str, tuple)): colors = [colors] # Check to see if colors haven't been formatted in a prior recursive step. if not isinstance(colors[0], tuple): colors = [ utils.scale_saturation(rgb_trip=utils.hex_to_rgb(c), sat=default_sat) for c in colors ] sns.set_palette(colors) df_copy = df.copy() if isinstance(dependent_cols, str): # Assume that the user is passing a single column with values # corresponding to another column's. if dependent_cols in df_copy.columns: assert isinstance(indep_stats, str) and isinstance( df_copy[indep_stats], pd.Series ), "A corresponding column should be passed as 'indep_stats' if 'dependent_cols' is a single df column." assert ( group_col != None ), "The 'group_col' argument must be passed if providing a single comparison column." # Create a similar form to the other path's df and recursively run this function. new_indep_stats = [ utils.round_if_int(float(s)) for s in df_copy[indep_stats].unique() ] # Sort the baseline stats, as they're likely years, so objective is a # graph that's increasing in time. sorted_nbs = sorted(new_indep_stats) # Derive whether it already was sorted to know how to order the value assignment. was_sorted = sorted_nbs == new_indep_stats if was_sorted == True: was_sorted = -1 else: was_sorted = 1 new_dep_cols = [str(s) + "_" + dependent_cols for s in sorted_nbs] df_cols = ["locations"] + new_dep_cols df_new = pd.DataFrame(columns=df_cols) df_new["locations"] = df_copy[group_col].unique() for lctn in df_new["locations"]: df_new.loc[ df_new[df_new["locations"] == lctn].index, new_dep_cols ] = df_copy.loc[ df_copy[df_copy[group_col] == lctn].index, dependent_cols ].values[ ::was_sorted ] return comp_line( df=df_new, dependent_cols=new_dep_cols, indep_stats=new_indep_stats, colors=colors, stacked=stacked, percent=percent, dsat=dsat, axis=axis, ) else: ValueError( "The 'dependent_cols' argument does not contain column labels for the provided dataframe." ) if percent == True: for col in dependent_cols: df_copy[col] = df_copy[col] / sum(df_copy[col]) if stacked: lol_allocations = [] for i in df_copy.index: list_of_allocations = [] for col in dependent_cols: list_of_allocations.append(df_copy.loc[i, col]) lol_allocations.append(list_of_allocations) if axis: ax = axis # to mirror seaborn axis plotting else: ax = plt.subplots()[1] print(indep_stats) print(lol_allocations) ax.stackplot(indep_stats, lol_allocations) else: if isinstance(dependent_cols, str): dependent_cols = [dependent_cols] for i in df_copy.index: ax = sns.lineplot( x=indep_stats, y=list(df_copy.loc[i, dependent_cols].values), ax=axis ) if percent == True: ax.set_ylim([0, 1]) ax.set_xlim([min(indep_stats), max(indep_stats)]) return ax