Source code for gerrytools.plotting.bins

from typing import List, Tuple, Union

import numpy as np
from numpy import array


[docs] def bins(scores, width=None, labels=8) -> Tuple[array, List, List, Union[float, int]]: """ Get necessary information for histograms. If we're working with only a few discrete, floating point values, then set the bin width to be relatively thin. Otherwise, adaptively set the bin width to the scale of our data. Args: scores (list): The collection of all observations. width (int, optional): The width of the bins. labels (int, optional): The number of histograms to be labeled. Returns: A tuple consisting of the histogram bins, the bins that are ticked, the labels for the bins that are ticked, and the bin width. """ # Get the minimum score and maximum score minscore, maxscore = min(scores), max(scores) # Calculate bin width using Gabe's logarithmic heuristic # TODO: Test this with real score data and see how it looks if not width: width = 10 ** (np.floor(np.log10(maxscore - minscore)) - 1) if width == 0.01: width /= 5 if width == 0.1: width = 1 if width >= 1: width = int(width) hist_bins = np.arange(minscore, maxscore + 2 * width, width) label_interval = max(int(len(hist_bins) / labels), 1) tick_bins, tick_labels = [], [] for i, x in enumerate(hist_bins[:-1]): if i % label_interval == 0: tick_labels.append(x) tick_bins.append(x + width / 2) for i, label in enumerate(tick_labels): if isinstance(label, np.float64): tick_labels[i] = round(label, 2) return hist_bins, tick_bins, tick_labels, width