Skip to content

.functionality

  • Intermediate classes based on the functionality.

    BokehDataAnnotator

    Annoate data points via callbacks on the buttons.

    Features:

    • alter values in the 'label' column through the widgets.

    plot(self)

    Re-plot all data points with the new labels.

    Overrides the parent method. Determines the label -> color mapping dynamically.

    Source code in hover/core/explorer/functionality.py
    def plot(self):
        """
        ???+ note "Re-plot all data points with the new labels."
            Overrides the parent method.
            Determines the label -> color mapping dynamically.
        """
        for _key, _source in self.sources.items():
            self.figure.circle(
                "x",
                "y",
                name=_key,
                color=SOURCE_COLOR_FIELD,
                source=_source,
                **self.glyph_kwargs[_key],
            )
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")
    

    BokehDataFinder

    Plot data points in grey ('gainsboro') and highlight search positives in coral.

    Features:

    • the search widgets will highlight the results through a change of color, which gives arguably the best focus.

    plot(self)

    Plot all data points.
    Source code in hover/core/explorer/functionality.py
    def plot(self):
        """
        ???+ note "Plot all data points."
        """
        for _key, _source in self.sources.items():
            self.figure.circle(
                "x", "y", name=_key, source=_source, **self.glyph_kwargs[_key]
            )
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")
    

    BokehMarginExplorer

    Plot data points along with two versions of labels.

    Could be useful for A/B tests.

    Features:

    • can choose to only plot the margins about specific labels.
    • currently not considering multi-label scenarios.

    __init__(self, df_dict, label_col_a, label_col_b, **kwargs) special

    Additional construtor

    On top of the requirements of the parent class, the input dataframe should contain:

    • label_col_a and label_col_b for "label margins".
    Param Type Description
    df_dict dict str -> DataFrame mapping
    label_col_a str column for label set A
    label_col_b str column for label set B
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/functionality.py
    def __init__(self, df_dict, label_col_a, label_col_b, **kwargs):
        """
        ???+ note "Additional construtor"
            On top of the requirements of the parent class,
            the input dataframe should contain:
    
            - label_col_a and label_col_b for "label margins".
    
            | Param         | Type   | Description                  |
            | :------------ | :----- | :--------------------------- |
            | `df_dict`     | `dict` | `str` -> `DataFrame` mapping |
            | `label_col_a` | `str`  | column for label set A       |
            | `label_col_b` | `str`  | column for label set B       |
            | `**kwargs`    |        | forwarded to `bokeh.plotting.figure` |
        """
        self.label_col_a = label_col_a
        self.label_col_b = label_col_b
        super().__init__(df_dict, **kwargs)
    

    plot(self, label, **kwargs)

    Plot the margins about a single label.
    Param Type Description
    label the label to plot about
    **kwargs forwarded to plotting markers
    Source code in hover/core/explorer/functionality.py
    def plot(self, label, **kwargs):
        """
        ???+ note "Plot the margins about a single label."
            | Param      | Type   | Description                  |
            | :--------- | :----- | :--------------------------- |
            | `label`    |        | the label to plot about      |
            | `**kwargs` |        | forwarded to plotting markers |
        """
    
        for _key, _source in self.sources.items():
            # prepare plot settings
            eff_kwargs = self.glyph_kwargs[_key].copy()
            eff_kwargs.update(kwargs)
            eff_kwargs["legend_label"] = f"{label}"
    
            # create agreement/increment/decrement subsets
            col_a_pos = np.where(self.dfs[_key][self.label_col_a] == label)[0].tolist()
            col_a_neg = np.where(self.dfs[_key][self.label_col_a] != label)[0].tolist()
            col_b_pos = np.where(self.dfs[_key][self.label_col_b] == label)[0].tolist()
            col_b_neg = np.where(self.dfs[_key][self.label_col_b] != label)[0].tolist()
            agreement_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_pos)]
            )
            increment_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_neg), IndexFilter(col_b_pos)]
            )
            decrement_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_neg)]
            )
    
            to_plot = [
                {"view": agreement_view, "marker": self.figure.square},
                {"view": increment_view, "marker": self.figure.x},
                {"view": decrement_view, "marker": self.figure.cross},
            ]
    
            # plot created subsets
            for _dict in to_plot:
                _view = _dict["view"]
                _marker = _dict["marker"]
                _marker("x", "y", name=_key, source=_source, view=_view, **eff_kwargs)
    

    BokehSnorkelExplorer

    Plot data points along with labeling function (LF) outputs.

    Features:

    • each labeling function corresponds to its own line_color.
    • uses a different marker for each type of predictions: square for 'correct', x for 'incorrect', cross for 'missed', circle for 'hit'.
    • 'correct': the LF made a correct prediction on a point in the 'labeled' set.
    • 'incorrect': the LF made an incorrect prediction on a point in the 'labeled' set.
    • 'missed': the LF is capable of predicting the target class, but did not make such prediction on the particular point.
    • 'hit': the LF made a prediction on a point in the 'raw' set.

    __init__(self, df_dict, **kwargs) special

    Additional construtor

    Set up

    • a list to keep track of plotted labeling functions.
    • a palette for plotting labeling function predictions.
    Param Type Description
    df_dict dict str -> DataFrame mapping
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/functionality.py
    def __init__(self, df_dict, **kwargs):
        """
        ???+ note "Additional construtor"
            Set up
    
            -   a list to keep track of plotted labeling functions.
            -   a palette for plotting labeling function predictions.
    
            | Param       | Type   | Description                  |
            | :---------- | :----- | :--------------------------- |
            | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
            | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
        """
        super().__init__(df_dict, **kwargs)
    
        # initialize a list to keep track of plotted LFs
        self.lfs = []
        self.palette = Category20[20]
    

    plot(self, *args, **kwargs)

    Plot the raw subset in the background.
    Source code in hover/core/explorer/functionality.py
    def plot(self, *args, **kwargs):
        """
        ???+ note "Plot the raw subset in the background."
        """
        self.figure.circle(
            "x", "y", name="raw", source=self.sources["raw"], **self.glyph_kwargs["raw"]
        )
        self._good(f"Plotted subset raw with {self.dfs['raw'].shape[0]} points")
    

    plot_lf(self, lf, L_raw=None, L_labeled=None, include=('C', 'I', 'M'), **kwargs)

    Plot about a single labeling function.
    Param Type Description
    lf callable labeling function decorated by @labeling_function() from hover.utils.snorkel_helper
    L_raw np.ndarray predictions, in decoded str, on the "raw" set
    L_labeled np.ndarray predictions, in decoded str, on the "labeled" set
    include tuple of str "C" for correct, "I" for incorrect, "M" for missed", "H" for hit: types of predictions to make visible in the plot
    **kwargs forwarded to plotting markers
    • lf: labeling function decorated by @labeling_function() from hover.utils.snorkel_helper
    • L_raw: .
    • L_labeled: .
    • include: subsets to show, which can be correct(C)/incorrect(I)/missed(M)/hit(H).
    Source code in hover/core/explorer/functionality.py
    def plot_lf(
        self, lf, L_raw=None, L_labeled=None, include=("C", "I", "M"), **kwargs
    ):
        """
        ???+ note "Plot about a single labeling function."
            | Param       | Type             | Description                  |
            | :---------- | :--------------- | :--------------------------- |
            | `lf`        | `callable`       | labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper` |
            | `L_raw`     | `np.ndarray`     | predictions, in decoded `str`, on the `"raw"` set |
            | `L_labeled` | `np.ndarray`     | predictions, in decoded `str`, on the `"labeled"` set |
            | `include`   | `tuple` of `str` | "C" for correct, "I" for incorrect, "M" for missed", "H" for hit: types of predictions to make visible in the plot |
            | `**kwargs`  |                  | forwarded to plotting markers |
    
    
            - lf: labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper`
            - L_raw: .
            - L_labeled: .
            - include: subsets to show, which can be correct(C)/incorrect(I)/missed(M)/hit(H).
        """
        # keep track of added LF
        self.lfs.append(lf)
    
        # calculate predicted labels if not provided
        if L_raw is None:
            L_raw = self.dfs["raw"].apply(lf, axis=1).values
        if L_labeled is None:
            L_labeled = self.dfs["labeled"].apply(lf, axis=1).values
    
        # prepare plot settings
        legend_label = f"{', '.join(lf.targets)} | {lf.name}"
        color = self.palette[len(self.lfs) - 1]
    
        raw_glyph_kwargs = self.glyph_kwargs["raw"].copy()
        raw_glyph_kwargs["legend_label"] = legend_label
        raw_glyph_kwargs["color"] = color
        raw_glyph_kwargs.update(kwargs)
    
        labeled_glyph_kwargs = self.glyph_kwargs["labeled"].copy()
        labeled_glyph_kwargs["legend_label"] = legend_label
        labeled_glyph_kwargs["color"] = color
        labeled_glyph_kwargs.update(kwargs)
    
        # create correct/incorrect/missed/hit subsets
        to_plot = []
        if "C" in include:
            to_plot.append(
                {
                    "name": "labeled",
                    "view": self._view_correct(L_labeled),
                    "marker": self.figure.square,
                    "kwargs": labeled_glyph_kwargs,
                }
            )
        if "I" in include:
            to_plot.append(
                {
                    "name": "labeled",
                    "view": self._view_incorrect(L_labeled),
                    "marker": self.figure.x,
                    "kwargs": labeled_glyph_kwargs,
                }
            )
        if "M" in include:
            to_plot.append(
                {
                    "name": "labeled",
                    "view": self._view_missed(L_labeled, lf.targets),
                    "marker": self.figure.cross,
                    "kwargs": labeled_glyph_kwargs,
                }
            )
        if "H" in include:
            to_plot.append(
                {
                    "name": "raw",
                    "view": self._view_hit(L_raw),
                    "marker": self.figure.circle,
                    "kwargs": raw_glyph_kwargs,
                }
            )
    
        # plot created subsets
        for _dict in to_plot:
            _name = _dict["name"]
            _view = _dict["view"]
            _marker = _dict["marker"]
            _kwargs = _dict["kwargs"]
            _marker("x", "y", source=_view.source, view=_view, name=_name, **_kwargs)
    

    BokehSoftLabelExplorer

    Plot data points according to their labels and confidence scores.

    Features:

    • the predicted label will correspond to fill_color.
    • the confidence score, assumed to be a float between 0.0 and 1.0, will be reflected through fill_alpha.
    • currently not considering multi-label scenarios.

    __init__(self, df_dict, label_col, score_col, **kwargs) special

    Additional construtor

    On top of the requirements of the parent class, the input dataframe should contain:

    • label_col and score_col for "soft predictions".
    Param Type Description
    df_dict dict str -> DataFrame mapping
    label_col str column for the soft label
    score_col str column for the soft score
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/functionality.py
    def __init__(self, df_dict, label_col, score_col, **kwargs):
        """
        ???+ note "Additional construtor"
            On top of the requirements of the parent class,
            the input dataframe should contain:
    
            - label_col and score_col for "soft predictions".
    
            | Param       | Type   | Description                  |
            | :---------- | :----- | :--------------------------- |
            | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
            | `label_col` | `str`  | column for the soft label    |
            | `score_col` | `str`  | column for the soft score    |
            | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
        """
        assert label_col != "label", "'label' field is reserved"
        self.label_col = label_col
        self.score_col = score_col
        super().__init__(df_dict, **kwargs)
    

    plot(self, **kwargs)

    Plot all data points, setting color alpha based on the soft score.
    Param Type Description
    **kwargs forwarded to plotting markers
    Source code in hover/core/explorer/functionality.py
    def plot(self, **kwargs):
        """
        ???+ note "Plot all data points, setting color alpha based on the soft score."
            | Param      | Type   | Description                  |
            | :--------- | :----- | :--------------------------- |
            | `**kwargs` |        | forwarded to plotting markers |
        """
        for _key, _source in self.sources.items():
            # prepare plot settings
            preset_kwargs = {
                "color": SOURCE_COLOR_FIELD,
                "fill_alpha": SOURCE_ALPHA_FIELD,
            }
            eff_kwargs = self.glyph_kwargs[_key].copy()
            eff_kwargs.update(preset_kwargs)
            eff_kwargs.update(kwargs)
    
            self.figure.circle("x", "y", name=_key, source=_source, **eff_kwargs)
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")