Skip to content


  • Intermediate classes based on the functionality.

    BokehDataAnnotator (BokehBaseExplorer)

    Annoate data points via callbacks on the buttons.


    • alter values in the 'label' column through the widgets.
    Source code in hover/core/explorer/
    class BokehDataAnnotator(BokehBaseExplorer):
        ???+ note "Annoate data points via callbacks on the buttons."
            - alter values in the 'label' column through the widgets.
            _key: {
                "constant": {"line_alpha": 0.3},
                "search": {
                    "size": ("size", 10, 5, 7),
                    "fill_alpha": ("fill_alpha", 0.5, 0.1, 0.4),
            for _key in ["raw", "train", "dev", "test"]
        def _postprocess_sources(self):
            ???+ note "Infer glyph colors from the label dynamically."
                This is during initialization or re-plotting, creating a new attribute column for each data source.
            color_dict = self.auto_color_mapping()
            for _key, _df in self.dfs.items():
                _color = (
                    .apply(lambda label: color_dict.get(label, "gainsboro"))
                self.sources[_key].add(_color, SOURCE_COLOR_FIELD)
        def _update_colors(self):
            ???+ note "Infer glyph colors from the label dynamically."
                This is during annotation callbacks, patching an existing column for the `raw` subset only.
            # infer glyph colors dynamically
            color_dict = self.auto_color_mapping()
            color_list = (
                .apply(lambda label: color_dict.get(label, "gainsboro"))
                {SOURCE_COLOR_FIELD: [(slice(len(color_list)), color_list)]}
            self._good(f"Updated annotator plot at {current_time()}")
        def _setup_widgets(self):
            ???+ note "Create annotator widgets and assign Python callbacks."
            from bokeh.models import TextInput
            self.annotator_input = TextInput(title="Label:")
            self.annotator_apply = Button(
            def callback_apply():
                A callback on clicking the 'self.annotator_apply' button.
                Update labels in the source.
                label = self.annotator_input.value
                selected_idx = self.sources["raw"].selected.indices
                if not selected_idx:
                        "attempting annotation: did not select any data points. Eligible subset is 'raw'."
                self._info(f"applying {len(selected_idx)} annotations...")
                # update label in both the df and the data source
                self.dfs["raw"].loc[selected_idx, "label"] = label
                patch_to_apply = [(_idx, label) for _idx in selected_idx]
                self.sources["raw"].patch({"label": patch_to_apply})
                self._good(f"applied {len(selected_idx)} annotations: {label}")
            # assign the callback and keep the reference
            self._callback_apply = callback_apply
        def plot(self):
            ???+ note "Re-plot all data points with the new labels."
                Overrides the parent method.
                Determines the label -> color mapping dynamically.
            xy_axes = self.find_embedding_fields()[:2]
            for _key, _source in self.sources.items():
                self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")


    Re-plot all data points with the new labels.

    Overrides the parent method. Determines the label -> color mapping dynamically.

    Source code in hover/core/explorer/
    def plot(self):
        ???+ note "Re-plot all data points with the new labels."
            Overrides the parent method.
            Determines the label -> color mapping dynamically.
        xy_axes = self.find_embedding_fields()[:2]
        for _key, _source in self.sources.items():
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")

    BokehDataFinder (BokehBaseExplorer)

    Plot data points in grey ('gainsboro') and highlight search positives in coral.


    • the search widgets will highlight the results through a change of color.
    • the search results can be used as a filter condition.
    Source code in hover/core/explorer/
    class BokehDataFinder(BokehBaseExplorer):
        ???+ note "Plot data points in grey ('gainsboro') and highlight search positives in coral."
            -   the search widgets will highlight the results through a change of color.
            -   the search results can be used as a filter condition.
            _key: {
                "constant": {"line_alpha": 0.4},
                "search": {
                    "size": ("size", 10, 5, 7),
                    "fill_alpha": ("fill_alpha", 0.4, 0.1, 0.2),
                    "color": ("color", "coral", "linen", "gainsboro"),
            for _key in ["raw", "train", "dev", "test"]
        def _setup_widgets(self):
            ???+ note "Create score range slider that filters selections."
            from bokeh.models import CheckboxGroup
            self.search_filter_box = CheckboxGroup(
                labels=["use as selection filter"], active=[]
        def _subroutine_search_activate_callbacks(self):
            ???+ note "Activate search callback functions by binding them to widgets."
            def filter_flag():
                return bool(0 in
            def filter_by_search(indices, subset):
                Filter selection with search results on a subset.
                if not filter_flag():
                    return indices
                search_scores = self.sources[subset].data[SEARCH_SCORE_FIELD]
                matched = set(np.where(np.array(search_scores) > 0)[0])
                return indices.intersection(matched)
            for _key in self.sources.keys():
            # when toggled as active, search changes trigger selection filter
            for _widget in self._search_watch_widgets():
                    lambda attr, old, new: self._selection_stages_callback(
                        "load", "write", "read"
                    if filter_flag()
                    else None,
            # change of toggles always trigger selection filter
                lambda attr, old, new: self._selection_stages_callback(
                    "load", "write", "read"
        def plot(self):
            ???+ note "Plot all data points."
            xy_axes = self.find_embedding_fields()[:2]
            for _key, _source in self.sources.items():
                    *xy_axes, name=_key, source=_source, **self.glyph_kwargs[_key]
                self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")


    Plot all data points.
    Source code in hover/core/explorer/
    def plot(self):
        ???+ note "Plot all data points."
        xy_axes = self.find_embedding_fields()[:2]
        for _key, _source in self.sources.items():
                *xy_axes, name=_key, source=_source, **self.glyph_kwargs[_key]
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")

    BokehMarginExplorer (BokehBaseExplorer)

    Plot data points along with two versions of labels.

    Could be useful for A/B tests.


    • can choose to only plot the margins about specific labels.
    • currently not considering multi-label scenarios.
    Source code in hover/core/explorer/
    class BokehMarginExplorer(BokehBaseExplorer):
        ???+ note "Plot data points along with two versions of labels."
            Could be useful for A/B tests.
            - can choose to only plot the margins about specific labels.
            - currently not considering multi-label scenarios.
            _key: {
                "constant": {"color": "gainsboro", "line_alpha": 0.5, "fill_alpha": 0.0},
                "search": {"size": ("size", 10, 5, 7)},
            for _key in ["raw", "train", "dev"]
        DEFAULT_SUBSET_MAPPING = {_k: _k for _k in ["raw", "train", "dev"]}
        def __init__(self, df_dict, label_col_a, label_col_b, **kwargs):
            ???+ note "Additional construtor"
                On top of the requirements of the parent class,
                the input dataframe should contain:
                - label_col_a and label_col_b for "label margins".
                | Param         | Type   | Description                  |
                | :------------ | :----- | :--------------------------- |
                | `df_dict`     | `dict` | `str` -> `DataFrame` mapping |
                | `label_col_a` | `str`  | column for label set A       |
                | `label_col_b` | `str`  | column for label set B       |
                | `**kwargs`    |        | forwarded to `bokeh.plotting.figure` |
            self.label_col_a = label_col_a
            self.label_col_b = label_col_b
            super().__init__(df_dict, **kwargs)
        def _mandatory_column_defaults(self):
            ???+ note "Mandatory columns and default values."
                If default value is None, will raise exception if the column is not found.
            column_to_value = super()._mandatory_column_defaults()
                    self.label_col_a: None,
                    self.label_col_b: None,
            return column_to_value
        def plot(self, label, **kwargs):
            ???+ note "Plot the margins about a single label."
                | Param      | Type   | Description                  |
                | :--------- | :----- | :--------------------------- |
                | `label`    |        | the label to plot about      |
                | `**kwargs` |        | forwarded to plotting markers |
            xy_axes = self.find_embedding_fields()[:2]
            for _key, _source in self.sources.items():
                # prepare plot settings
                eff_kwargs = self.glyph_kwargs[_key].copy()
                eff_kwargs["legend_label"] = f"{label}"
                # create agreement/increment/decrement subsets
                col_a_pos = np.where(self.dfs[_key][self.label_col_a] == label)[0].tolist()
                col_a_neg = np.where(self.dfs[_key][self.label_col_a] != label)[0].tolist()
                col_b_pos = np.where(self.dfs[_key][self.label_col_b] == label)[0].tolist()
                col_b_neg = np.where(self.dfs[_key][self.label_col_b] != label)[0].tolist()
                agreement_view = CDSView(
                    source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_pos)]
                increment_view = CDSView(
                    source=_source, filters=[IndexFilter(col_a_neg), IndexFilter(col_b_pos)]
                decrement_view = CDSView(
                    source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_neg)]
                to_plot = [
                    {"view": agreement_view, "marker": self.figure.square},
                    {"view": increment_view, "marker": self.figure.x},
                    {"view": decrement_view, "marker": self.figure.cross},
                # plot created subsets
                for _dict in to_plot:
                    _view = _dict["view"]
                    _marker = _dict["marker"]
                    _marker(*xy_axes, name=_key, source=_source, view=_view, **eff_kwargs)

    __init__(self, df_dict, label_col_a, label_col_b, **kwargs) special

    Additional construtor

    On top of the requirements of the parent class, the input dataframe should contain:

    • label_col_a and label_col_b for "label margins".
    Param Type Description
    df_dict dict str -> DataFrame mapping
    label_col_a str column for label set A
    label_col_b str column for label set B
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/
    def __init__(self, df_dict, label_col_a, label_col_b, **kwargs):
        ???+ note "Additional construtor"
            On top of the requirements of the parent class,
            the input dataframe should contain:
            - label_col_a and label_col_b for "label margins".
            | Param         | Type   | Description                  |
            | :------------ | :----- | :--------------------------- |
            | `df_dict`     | `dict` | `str` -> `DataFrame` mapping |
            | `label_col_a` | `str`  | column for label set A       |
            | `label_col_b` | `str`  | column for label set B       |
            | `**kwargs`    |        | forwarded to `bokeh.plotting.figure` |
        self.label_col_a = label_col_a
        self.label_col_b = label_col_b
        super().__init__(df_dict, **kwargs)

    plot(self, label, **kwargs)

    Plot the margins about a single label.
    Param Type Description
    label the label to plot about
    **kwargs forwarded to plotting markers
    Source code in hover/core/explorer/
    def plot(self, label, **kwargs):
        ???+ note "Plot the margins about a single label."
            | Param      | Type   | Description                  |
            | :--------- | :----- | :--------------------------- |
            | `label`    |        | the label to plot about      |
            | `**kwargs` |        | forwarded to plotting markers |
        xy_axes = self.find_embedding_fields()[:2]
        for _key, _source in self.sources.items():
            # prepare plot settings
            eff_kwargs = self.glyph_kwargs[_key].copy()
            eff_kwargs["legend_label"] = f"{label}"
            # create agreement/increment/decrement subsets
            col_a_pos = np.where(self.dfs[_key][self.label_col_a] == label)[0].tolist()
            col_a_neg = np.where(self.dfs[_key][self.label_col_a] != label)[0].tolist()
            col_b_pos = np.where(self.dfs[_key][self.label_col_b] == label)[0].tolist()
            col_b_neg = np.where(self.dfs[_key][self.label_col_b] != label)[0].tolist()
            agreement_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_pos)]
            increment_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_neg), IndexFilter(col_b_pos)]
            decrement_view = CDSView(
                source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_neg)]
            to_plot = [
                {"view": agreement_view, "marker": self.figure.square},
                {"view": increment_view, "marker": self.figure.x},
                {"view": decrement_view, "marker": self.figure.cross},
            # plot created subsets
            for _dict in to_plot:
                _view = _dict["view"]
                _marker = _dict["marker"]
                _marker(*xy_axes, name=_key, source=_source, view=_view, **eff_kwargs)

    BokehSnorkelExplorer (BokehBaseExplorer)

    Plot data points along with labeling function (LF) outputs.


    • each labeling function corresponds to its own line_color.
    • uses a different marker for each type of predictions: square for 'correct', x for 'incorrect', cross for 'missed', circle for 'hit'.
    • 'correct': the LF made a correct prediction on a point in the 'labeled' set.
    • 'incorrect': the LF made an incorrect prediction on a point in the 'labeled' set.
    • 'missed': the LF is capable of predicting the target class, but did not make such prediction on the particular point.
    • 'hit': the LF made a prediction on a point in the 'raw' set.
    Source code in hover/core/explorer/
    class BokehSnorkelExplorer(BokehBaseExplorer):
        ???+ note "Plot data points along with labeling function (LF) outputs."
            -   each labeling function corresponds to its own line_color.
            -   uses a different marker for each type of predictions: square for 'correct', x for 'incorrect', cross for 'missed', circle for 'hit'.
              -   'correct': the LF made a correct prediction on a point in the 'labeled' set.
              -   'incorrect': the LF made an incorrect prediction on a point in the 'labeled' set.
              -   'missed': the LF is capable of predicting the target class, but did not make such prediction on the particular point.
              -   'hit': the LF made a prediction on a point in the 'raw' set.
            "raw": {
                "constant": {"line_alpha": 1.0, "color": "gainsboro"},
                "search": {
                    "size": ("size", 10, 5, 7),
                    "fill_alpha": ("fill_alpha", 0.4, 0.05, 0.2),
            "labeled": {
                "constant": {"line_alpha": 1.0, "fill_alpha": 0.0},
                "search": {"size": ("size", 10, 5, 7)},
        DEFAULT_SUBSET_MAPPING = {"raw": "raw", "dev": "labeled"}
        def __init__(self, df_dict, **kwargs):
            ???+ note "Additional construtor"
                Set up
                -   a list to keep track of plotted labeling functions.
                -   a palette for plotting labeling function predictions.
                | Param       | Type   | Description                  |
                | :---------- | :----- | :--------------------------- |
                | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
                | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
            super().__init__(df_dict, **kwargs)
            self.palette = list(Category20[20])
            self._subscribed_lf_list = None
        def _setup_sources(self):
            ???+ note "Create data structures that source interactions will need."
            # keep track of plotted LFs and glyphs, which will interact with sources
            self.lf_data = OrderedDict()
        def subscribed_lf_list(self):
            ???+ note "A list of LFs to which the explorer can be lazily synchronized."
                Intended for recipes where the user can modify LFs without having access to the explorer.
            return self._subscribed_lf_list
        def subscribed_lf_list(self, lf_list):
            ???+ note "Subscribe to a list of LFs."
            assert isinstance(lf_list, list), f"Expected a list of LFs, got {lf_list}"
            if self.subscribed_lf_list is None:
                self._good("Subscribed to a labeling function list BY REFERENCE.")
                self._warn("Changing labeling function list subscription.")
            self._subscribed_lf_list = lf_list
        def _setup_widgets(self):
            ???+ note "Create labeling function support widgets and assign Python callbacks."
        def _subroutine_setup_lf_list_refresher(self):
            ???+ note "Create widget for refreshing LF list and replotting."
            self.lf_list_refresher = Button(
                label="Refresh Functions",
            def callback_refresh_lf_plot():
                Re-plot according to subscribed_lf_list.
                if self.subscribed_lf_list is None:
                    self._warn("cannot refresh LF plot without subscribed LF list.")
                lf_names_to_keep = set([ for _lf in self.subscribed_lf_list])
                lf_names_to_drop = set(self.lf_data.keys()).difference(lf_names_to_keep)
                for _lf_name in lf_names_to_drop:
                for _lf in self.subscribed_lf_list:
            def callback_refresh_lf_menu():
                The menu was assigned by value and needs to stay consistent with LF updates.
                To be triggered in self.plot_new_lf() and self.unplot_lf().
       = list(self.lf_data.keys())
       = list(self.lf_data.keys())
            self._callback_refresh_lf_menu = callback_refresh_lf_menu
            # self.lf_list_refresher.on_click(callback_refresh_lf_menu)
        def _subroutine_setup_lf_apply_trigger(self):
            ???+ note "Create widget for applying LFs on data."
            self.lf_apply_trigger = Dropdown(
                label="Apply Labels",
            def callback_apply(event):
                A callback on clicking the 'self.lf_apply_trigger' button.
                Update labels in the source similarly to the annotator.
                However, in this explorer, because LFs already use color, the produced labels will not.
                lf = self.lf_data[event.item]["lf"]
                assert callable(lf), f"Expected a function, got {lf}"
                selected_idx = self.sources["raw"].selected.indices
                if not selected_idx:
                        "attempting labeling by function: did not select any data points. Eligible subset is 'raw'."
                labels = self.dfs["raw"].iloc[selected_idx].apply(lf, axis=1).values
                num_nontrivial = len(
                    list(filter(lambda l: l != module_config.ABSTAIN_DECODED, labels))
                # update label in both the df and the data source
                self.dfs["raw"].loc[selected_idx, "label"] = labels
                for _idx, _label in zip(selected_idx, labels):
                    _idx = int(_idx)
                    self.sources["raw"].patch({"label": [(_idx, _label)]})
                    f"applied {num_nontrivial}/{len(labels)} annotations by func {}"
        def _subroutine_setup_lf_filter_trigger(self):
            ???+ note "Create widget for using LFs to filter data."
            self.lf_filter_trigger = Dropdown(
                label="Use as Selection Filter",
            def callback_filter(event):
                A callback on clicking the 'self.lf_filter_trigger' button.
                Update selected indices in a one-time manner.
                lf = self.lf_data[event.item]["lf"]
                assert callable(lf), f"Expected a function, got {lf}"
                for _key, _source in self.sources.items():
                    _selected = _source.selected.indices
                    _labels = self.dfs[_key].iloc[_selected].apply(lf, axis=1).values
                    _kept = [
                        for _idx, _label in zip(_selected, _labels)
                        if _label != module_config.ABSTAIN_DECODED
                    self.sources[_key].selected.indices = _kept
                # selection reduced, need to trigger readall callbacks
        def _postprocess_sources(self):
            ???+ note "Refresh all LF glyphs because data source has changed."
            for _lf_name in self.lf_data.keys():
        def plot(self, *args, **kwargs):
            ???+ note "Plot the raw subset in the background."
            xy_axes = self.find_embedding_fields()[:2]
                *xy_axes, name="raw", source=self.sources["raw"], **self.glyph_kwargs["raw"]
            self._good(f"Plotted subset raw with {self.dfs['raw'].shape[0]} points")
        def plot_lf(self, lf, **kwargs):
            ???+ note "Add or refresh a single labeling function on the plot."
                | Param       | Type             | Description                  |
                | :---------- | :--------------- | :--------------------------- |
                | `lf`        | `callable`       | labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper` |
                | `**kwargs`  |             | forwarded to `self.plot_new_lf()` |
            # keep track of added LF
            if in self.lf_data:
                # skip if the functions are identical
                if self.lf_data[]["lf"] is lf:
                # overwrite the function and refresh glyphs
                self.lf_data[]["lf"] = lf
            self.plot_new_lf(lf, **kwargs)
        def unplot_lf(self, lf_name):
            ???+ note "Remove a single labeling function from the plot."
                | Param     | Type   | Description               |
                | :-------- | :----- | :------------------------ |
                | `lf_name` | `str`  | name of labeling function |
            assert lf_name in self.lf_data, f"trying to remove non-existing LF: {lf_name}"
            data_dict = self.lf_data.pop(lf_name)
            lf, glyph_dict = data_dict["lf"], data_dict["glyphs"]
            assert == lf_name, f"LF name mismatch: {} vs {lf_name}"
            # remove from legend, checking that there is exactly one entry
            legend_idx_to_pop = None
            for i, _item in enumerate(self.figure.legend.items):
                _label = _item.label.value
                if _label == lf_name:
                    assert legend_idx_to_pop is None, f"Legend collision: {lf_name}"
                    legend_idx_to_pop = i
            assert isinstance(legend_idx_to_pop, int), f"Missing from legend: {lf_name}"
            # remove from renderers
            # get indices to pop in ascending order
            renderer_indices_to_pop = []
            for i, _renderer in enumerate(self.figure.renderers):
                if lf_name in _renderer.glyph.tags:
            # check that the number of glyphs founded matches expected value
            num_fnd, num_exp = len(renderer_indices_to_pop), len(glyph_dict)
            assert num_fnd == num_exp, f"Glyph mismatch: {num_fnd} vs. {num_exp}"
            # process indices in descending order to avoid shifts
            for i in renderer_indices_to_pop[::-1]:
            # return color to palette so that another LF can use it
            self._good(f"Unplotted LF {lf_name}")
        def refresh_glyphs(self, lf_name):
            ???+ note "Refresh the glyph(s) of a single LF based on its name."
                | Param     | Type   | Description               |
                | :-------- | :----- | :------------------------ |
                | `lf_name` | `str`  | name of labeling function |
                Assumes that specified C/I/M/H glyphs are stored.
                1. re-compute L_raw/L_labeled and CDSViews
                2. update the view for each glyph
            assert lf_name in self.lf_data, f"trying to refresh non-existing LF: {lf_name}"
            lf = self.lf_data[lf_name]["lf"]
            L_raw = self.dfs["raw"].apply(lf, axis=1).values
            L_labeled = self.dfs["labeled"].apply(lf, axis=1).values
            glyph_codes = self.lf_data[lf_name]["glyphs"].keys()
            if "C" in glyph_codes:
                c_view = self._view_correct(L_labeled)
                self.lf_data[lf_name]["glyphs"]["C"].view = c_view
            if "I" in glyph_codes:
                i_view = self._view_incorrect(L_labeled)
                self.lf_data[lf_name]["glyphs"]["I"].view = i_view
            if "M" in glyph_codes:
                m_view = self._view_missed(L_labeled, lf.targets)
                self.lf_data[lf_name]["glyphs"]["M"].view = m_view
            if "H" in glyph_codes:
                h_view = self._view_hit(L_raw)
                self.lf_data[lf_name]["glyphs"]["H"].view = h_view
            self._good(f"Refreshed the glyphs of LF {lf_name}")
        def plot_new_lf(
            self, lf, L_raw=None, L_labeled=None, include=("C", "I", "M"), **kwargs
            ???+ note "Plot a single labeling function and keep its settings for update."
                | Param       | Type             | Description                  |
                | :---------- | :--------------- | :--------------------------- |
                | `lf`        | `callable`       | labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper` |
                | `L_raw`     | `np.ndarray`     | predictions, in decoded `str`, on the `"raw"` set |
                | `L_labeled` | `np.ndarray`     | predictions, in decoded `str`, on the `"labeled"` set |
                | `include`   | `tuple` of `str` | "C" for correct, "I" for incorrect, "M" for missed", "H" for hit: types of predictions to make visible in the plot |
                | `**kwargs`  |                  | forwarded to plotting markers |
                - lf: labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper`
                - L_raw: numpy.ndarray
                - L_labeled: numpy.ndarray
                - include: subsets to show, which can be correct(C)/incorrect(I)/missed(M)/hit(H).
            # existing LF should not trigger this method
            assert not in self.lf_data, f"LF collision: {}"
            # calculate predicted labels if not provided
            if L_raw is None:
                L_raw = self.dfs["raw"].apply(lf, axis=1).values
            if L_labeled is None:
                L_labeled = self.dfs["labeled"].apply(lf, axis=1).values
            # prepare plot settings
            assert self.palette, f"Palette depleted, # LFs: {len(self.lf_data)}"
            legend_label =
            color = self.palette.pop(0)
            xy_axes = self.find_embedding_fields()[:2]
            raw_glyph_kwargs = self.glyph_kwargs["raw"].copy()
            raw_glyph_kwargs["legend_label"] = legend_label
            raw_glyph_kwargs["color"] = color
            labeled_glyph_kwargs = self.glyph_kwargs["labeled"].copy()
            labeled_glyph_kwargs["legend_label"] = legend_label
            labeled_glyph_kwargs["color"] = color
            # create dictionary to prepare for dynamic lf & glyph updates
            data_dict = {"lf": lf, "color": color, "glyphs": {}}
            # add correct/incorrect/missed/hit glyphs
            if "C" in include:
                view = self._view_correct(L_labeled)
                data_dict["glyphs"]["C"] = self.figure.square(
            if "I" in include:
                view = self._view_incorrect(L_labeled)
                data_dict["glyphs"]["I"] = self.figure.x(
            if "M" in include:
                view = self._view_missed(L_labeled, lf.targets)
                data_dict["glyphs"]["M"] = self.figure.cross(
            if "H" in include:
                view = self._view_hit(L_raw)
                data_dict["glyphs"]["H"] =
            # assign the completed dictionary
            self.lf_data[] = data_dict
            # reflect LF update in widgets
            self._good(f"Plotted new LF {}")
        def _view_correct(self, L_labeled):
            ???+ note "Determine the portion correctly labeled by a labeling function."
                | Param       | Type         | Description                  |
                | :---------- | :----------- | :--------------------------- |
                | `L_labeled` | `np.ndarray` | predictions on the labeled subset |
            if L_labeled.shape[0] == 0:
                indices = []
                agreed = self.dfs["labeled"]["label"].values == L_labeled
                attempted = L_labeled != module_config.ABSTAIN_DECODED
                indices = np.where(np.multiply(agreed, attempted))[0].tolist()
            view = CDSView(source=self.sources["labeled"], filters=[IndexFilter(indices)])
            return view
        def _view_incorrect(self, L_labeled):
            ???+ note "Determine the portion incorrectly labeled by a labeling function."
                | Param       | Type         | Description                  |
                | :---------- | :----------- | :--------------------------- |
                | `L_labeled` | `np.ndarray` | predictions on the labeled subset |
            if L_labeled.shape[0] == 0:
                indices = []
                disagreed = self.dfs["labeled"]["label"].values != L_labeled
                attempted = L_labeled != module_config.ABSTAIN_DECODED
                indices = np.where(np.multiply(disagreed, attempted))[0].tolist()
            view = CDSView(source=self.sources["labeled"], filters=[IndexFilter(indices)])
            return view
        def _view_missed(self, L_labeled, targets):
            ???+ note "Determine the portion missed by a labeling function."
                | Param       | Type          | Description                  |
                | :---------- | :------------ | :--------------------------- |
                | `L_labeled` | `np.ndarray`  | predictions on the labeled subset |
                | `targets` | `list` of `str` | labels that the function aims for |
            if L_labeled.shape[0] == 0:
                indices = []
                targetable = np.isin(self.dfs["labeled"]["label"], targets)
                abstained = L_labeled == module_config.ABSTAIN_DECODED
                indices = np.where(np.multiply(targetable, abstained))[0].tolist()
            view = CDSView(source=self.sources["labeled"], filters=[IndexFilter(indices)])
            return view
        def _view_hit(self, L_raw):
            ???+ note "Determine the portion hit by a labeling function."
                | Param       | Type         | Description                  |
                | :---------- | :----------- | :--------------------------- |
                | `L_raw`     | `np.ndarray` | predictions on the raw subset |
            if L_raw.shape[0] == 0:
                indices = []
                indices = np.where(L_raw != module_config.ABSTAIN_DECODED)[0].tolist()
            view = CDSView(source=self.sources["raw"], filters=[IndexFilter(indices)])
            return view

    subscribed_lf_list property writable

    A list of LFs to which the explorer can be lazily synchronized.

    Intended for recipes where the user can modify LFs without having access to the explorer.

    __init__(self, df_dict, **kwargs) special

    Additional construtor

    Set up

    • a list to keep track of plotted labeling functions.
    • a palette for plotting labeling function predictions.
    Param Type Description
    df_dict dict str -> DataFrame mapping
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/
    def __init__(self, df_dict, **kwargs):
        ???+ note "Additional construtor"
            Set up
            -   a list to keep track of plotted labeling functions.
            -   a palette for plotting labeling function predictions.
            | Param       | Type   | Description                  |
            | :---------- | :----- | :--------------------------- |
            | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
            | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
        super().__init__(df_dict, **kwargs)
        self.palette = list(Category20[20])
        self._subscribed_lf_list = None

    plot(self, *args, **kwargs)

    Plot the raw subset in the background.
    Source code in hover/core/explorer/
    def plot(self, *args, **kwargs):
        ???+ note "Plot the raw subset in the background."
        xy_axes = self.find_embedding_fields()[:2]
            *xy_axes, name="raw", source=self.sources["raw"], **self.glyph_kwargs["raw"]
        self._good(f"Plotted subset raw with {self.dfs['raw'].shape[0]} points")

    plot_lf(self, lf, **kwargs)

    Add or refresh a single labeling function on the plot.
    Param Type Description
    lf callable labeling function decorated by @labeling_function() from hover.utils.snorkel_helper
    **kwargs forwarded to self.plot_new_lf()
    Source code in hover/core/explorer/
    def plot_lf(self, lf, **kwargs):
        ???+ note "Add or refresh a single labeling function on the plot."
            | Param       | Type             | Description                  |
            | :---------- | :--------------- | :--------------------------- |
            | `lf`        | `callable`       | labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper` |
            | `**kwargs`  |             | forwarded to `self.plot_new_lf()` |
        # keep track of added LF
        if in self.lf_data:
            # skip if the functions are identical
            if self.lf_data[]["lf"] is lf:
            # overwrite the function and refresh glyphs
            self.lf_data[]["lf"] = lf
        self.plot_new_lf(lf, **kwargs)

    plot_new_lf(self, lf, L_raw=None, L_labeled=None, include=('C', 'I', 'M'), **kwargs)

    Plot a single labeling function and keep its settings for update.
    Param Type Description
    lf callable labeling function decorated by @labeling_function() from hover.utils.snorkel_helper
    L_raw np.ndarray predictions, in decoded str, on the "raw" set
    L_labeled np.ndarray predictions, in decoded str, on the "labeled" set
    include tuple of str "C" for correct, "I" for incorrect, "M" for missed", "H" for hit: types of predictions to make visible in the plot
    **kwargs forwarded to plotting markers
    • lf: labeling function decorated by @labeling_function() from hover.utils.snorkel_helper
    • L_raw: numpy.ndarray
    • L_labeled: numpy.ndarray
    • include: subsets to show, which can be correct(C)/incorrect(I)/missed(M)/hit(H).
    Source code in hover/core/explorer/
    def plot_new_lf(
        self, lf, L_raw=None, L_labeled=None, include=("C", "I", "M"), **kwargs
        ???+ note "Plot a single labeling function and keep its settings for update."
            | Param       | Type             | Description                  |
            | :---------- | :--------------- | :--------------------------- |
            | `lf`        | `callable`       | labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper` |
            | `L_raw`     | `np.ndarray`     | predictions, in decoded `str`, on the `"raw"` set |
            | `L_labeled` | `np.ndarray`     | predictions, in decoded `str`, on the `"labeled"` set |
            | `include`   | `tuple` of `str` | "C" for correct, "I" for incorrect, "M" for missed", "H" for hit: types of predictions to make visible in the plot |
            | `**kwargs`  |                  | forwarded to plotting markers |
            - lf: labeling function decorated by `@labeling_function()` from `hover.utils.snorkel_helper`
            - L_raw: numpy.ndarray
            - L_labeled: numpy.ndarray
            - include: subsets to show, which can be correct(C)/incorrect(I)/missed(M)/hit(H).
        # existing LF should not trigger this method
        assert not in self.lf_data, f"LF collision: {}"
        # calculate predicted labels if not provided
        if L_raw is None:
            L_raw = self.dfs["raw"].apply(lf, axis=1).values
        if L_labeled is None:
            L_labeled = self.dfs["labeled"].apply(lf, axis=1).values
        # prepare plot settings
        assert self.palette, f"Palette depleted, # LFs: {len(self.lf_data)}"
        legend_label =
        color = self.palette.pop(0)
        xy_axes = self.find_embedding_fields()[:2]
        raw_glyph_kwargs = self.glyph_kwargs["raw"].copy()
        raw_glyph_kwargs["legend_label"] = legend_label
        raw_glyph_kwargs["color"] = color
        labeled_glyph_kwargs = self.glyph_kwargs["labeled"].copy()
        labeled_glyph_kwargs["legend_label"] = legend_label
        labeled_glyph_kwargs["color"] = color
        # create dictionary to prepare for dynamic lf & glyph updates
        data_dict = {"lf": lf, "color": color, "glyphs": {}}
        # add correct/incorrect/missed/hit glyphs
        if "C" in include:
            view = self._view_correct(L_labeled)
            data_dict["glyphs"]["C"] = self.figure.square(
        if "I" in include:
            view = self._view_incorrect(L_labeled)
            data_dict["glyphs"]["I"] = self.figure.x(
        if "M" in include:
            view = self._view_missed(L_labeled, lf.targets)
            data_dict["glyphs"]["M"] = self.figure.cross(
        if "H" in include:
            view = self._view_hit(L_raw)
            data_dict["glyphs"]["H"] =
        # assign the completed dictionary
        self.lf_data[] = data_dict
        # reflect LF update in widgets
        self._good(f"Plotted new LF {}")

    refresh_glyphs(self, lf_name)

    Refresh the glyph(s) of a single LF based on its name.
    Param Type Description
    lf_name str name of labeling function

    Assumes that specified C/I/M/H glyphs are stored. 1. re-compute L_raw/L_labeled and CDSViews 2. update the view for each glyph

    Source code in hover/core/explorer/
    def refresh_glyphs(self, lf_name):
        ???+ note "Refresh the glyph(s) of a single LF based on its name."
            | Param     | Type   | Description               |
            | :-------- | :----- | :------------------------ |
            | `lf_name` | `str`  | name of labeling function |
            Assumes that specified C/I/M/H glyphs are stored.
            1. re-compute L_raw/L_labeled and CDSViews
            2. update the view for each glyph
        assert lf_name in self.lf_data, f"trying to refresh non-existing LF: {lf_name}"
        lf = self.lf_data[lf_name]["lf"]
        L_raw = self.dfs["raw"].apply(lf, axis=1).values
        L_labeled = self.dfs["labeled"].apply(lf, axis=1).values
        glyph_codes = self.lf_data[lf_name]["glyphs"].keys()
        if "C" in glyph_codes:
            c_view = self._view_correct(L_labeled)
            self.lf_data[lf_name]["glyphs"]["C"].view = c_view
        if "I" in glyph_codes:
            i_view = self._view_incorrect(L_labeled)
            self.lf_data[lf_name]["glyphs"]["I"].view = i_view
        if "M" in glyph_codes:
            m_view = self._view_missed(L_labeled, lf.targets)
            self.lf_data[lf_name]["glyphs"]["M"].view = m_view
        if "H" in glyph_codes:
            h_view = self._view_hit(L_raw)
            self.lf_data[lf_name]["glyphs"]["H"].view = h_view
        self._good(f"Refreshed the glyphs of LF {lf_name}")

    unplot_lf(self, lf_name)

    Remove a single labeling function from the plot.
    Param Type Description
    lf_name str name of labeling function
    Source code in hover/core/explorer/
    def unplot_lf(self, lf_name):
        ???+ note "Remove a single labeling function from the plot."
            | Param     | Type   | Description               |
            | :-------- | :----- | :------------------------ |
            | `lf_name` | `str`  | name of labeling function |
        assert lf_name in self.lf_data, f"trying to remove non-existing LF: {lf_name}"
        data_dict = self.lf_data.pop(lf_name)
        lf, glyph_dict = data_dict["lf"], data_dict["glyphs"]
        assert == lf_name, f"LF name mismatch: {} vs {lf_name}"
        # remove from legend, checking that there is exactly one entry
        legend_idx_to_pop = None
        for i, _item in enumerate(self.figure.legend.items):
            _label = _item.label.value
            if _label == lf_name:
                assert legend_idx_to_pop is None, f"Legend collision: {lf_name}"
                legend_idx_to_pop = i
        assert isinstance(legend_idx_to_pop, int), f"Missing from legend: {lf_name}"
        # remove from renderers
        # get indices to pop in ascending order
        renderer_indices_to_pop = []
        for i, _renderer in enumerate(self.figure.renderers):
            if lf_name in _renderer.glyph.tags:
        # check that the number of glyphs founded matches expected value
        num_fnd, num_exp = len(renderer_indices_to_pop), len(glyph_dict)
        assert num_fnd == num_exp, f"Glyph mismatch: {num_fnd} vs. {num_exp}"
        # process indices in descending order to avoid shifts
        for i in renderer_indices_to_pop[::-1]:
        # return color to palette so that another LF can use it
        self._good(f"Unplotted LF {lf_name}")

    BokehSoftLabelExplorer (BokehBaseExplorer)

    Plot data points according to their labels and confidence scores.


    • the predicted label will correspond to fill_color.
    • the confidence score, assumed to be a float between 0.0 and 1.0, will be reflected through fill_alpha.
    • currently not considering multi-label scenarios.
    Source code in hover/core/explorer/
    class BokehSoftLabelExplorer(BokehBaseExplorer):
        ???+ note "Plot data points according to their labels and confidence scores."
            - the predicted label will correspond to fill_color.
            - the confidence score, assumed to be a float between 0.0 and 1.0, will be reflected through fill_alpha.
            - currently not considering multi-label scenarios.
            _key: {"constant": {"line_alpha": 0.5}, "search": {"size": ("size", 10, 5, 7)}}
            for _key in ["raw", "train", "dev"]
        DEFAULT_SUBSET_MAPPING = {_k: _k for _k in ["raw", "train", "dev"]}
        def __init__(self, df_dict, label_col, score_col, **kwargs):
            ???+ note "Additional construtor"
                On top of the requirements of the parent class,
                the input dataframe should contain:
                - label_col and score_col for "soft predictions".
                | Param       | Type   | Description                  |
                | :---------- | :----- | :--------------------------- |
                | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
                | `label_col` | `str`  | column for the soft label    |
                | `score_col` | `str`  | column for the soft score    |
                | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
            assert label_col != "label", "'label' field is reserved"
            self.label_col = label_col
            self.score_col = score_col
            super().__init__(df_dict, **kwargs)
        def _build_tooltip(self, specified):
            ???+ note "On top of the parent method, add the soft label fields to the tooltip."
                | Param            | Type   | Description                  |
                | :--------------- | :----- | :--------------------------- |
                | `specified`      | `str`  | user-specified HTML          |
                Note that this is a method rather than a class attribute because
                child classes may involve instance attributes in the tooltip.
            if not specified:
                return bokeh_hover_tooltip(
                    custom={self.label_col: "Soft Label", self.score_col: "Soft Score"},
            return specified
        def _mandatory_column_defaults(self):
            ???+ note "Mandatory columns and default values."
                If default value is None, will raise exception if the column is not found.
            column_to_value = super()._mandatory_column_defaults()
                    self.label_col: module_config.ABSTAIN_DECODED,
                    self.score_col: 0.5,
            return column_to_value
        def _postprocess_sources(self):
            ???+ note "Infer glyph colors from the label dynamically."
            # infer glyph color from labels
            color_dict = self.auto_color_mapping()
            def get_color(label):
                return color_dict.get(label, "gainsboro")
            # infer glyph alpha from pseudo-percentile of soft label scores
            scores = np.concatenate(
                [_df[self.score_col].tolist() for _df in self.dfs.values()]
            scores_mean = scores.mean()
            scores_std = scores.std() + 1e-4
            def pseudo_percentile(confidence, lower=0.1, upper=0.9):
                # pretend that 2*std on each side covers everything
                unit_shift = upper - lower / 4
                # shift = unit_shift * z_score
                shift = unit_shift * (confidence - scores_mean) / scores_std
                percentile = 0.5 + shift
                return min(upper, max(lower, percentile))
            # infer alpha from score percentiles
            for _key, _df in self.dfs.items():
                _color = _df[self.label_col].apply(get_color).tolist()
                _alpha = _df[self.score_col].apply(pseudo_percentile).tolist()
                self.sources[_key].add(_color, SOURCE_COLOR_FIELD)
                self.sources[_key].add(_alpha, SOURCE_ALPHA_FIELD)
        def _setup_widgets(self):
            ???+ note "Create score range slider that filters selections."
            from bokeh.models import RangeSlider, CheckboxGroup
            self.score_range = RangeSlider(
                value=(0.0, 1.0),
                title="Score range",
            self.score_filter_box = CheckboxGroup(
                labels=["use as selection filter"], active=[]
            self.score_filter = row(self.score_range, self.score_filter_box)
            def filter_flag():
                return bool(0 in
            def subroutine(df, lower, upper):
                Calculate indices with score between lower/upper bounds.
                keep_l = set(np.where(df[self.score_col] >= lower)[0])
                keep_u = set(np.where(df[self.score_col] <= upper)[0])
                kept = keep_l.intersection(keep_u)
                return kept
            def filter_by_score(indices, subset):
                Filter selection with slider range on a subset.
                if not filter_flag():
                    return indices
                in_range = subroutine(self.dfs[subset], *self.score_range.value)
                return indices.intersection(in_range)
            # selection change triggers score filter on the changed subset IFF filter box is toggled
            for _key in self.sources.keys():
            # when toggled as active, score range change triggers selection filter
                lambda attr, old, new: self._selection_stages_callback(
                    "load", "write", "read"
                if filter_flag()
                else None,
            # changing toggles always re-evaluate selection filter
                lambda attr, old, new: self._selection_stages_callback(
                    "load", "write", "read"
        def plot(self, **kwargs):
            ???+ note "Plot all data points, setting color alpha based on the soft score."
                | Param      | Type   | Description                  |
                | :--------- | :----- | :--------------------------- |
                | `**kwargs` |        | forwarded to plotting markers |
            xy_axes = self.find_embedding_fields()[:2]
            for _key, _source in self.sources.items():
                # prepare plot settings
                preset_kwargs = {
                    "color": SOURCE_COLOR_FIELD,
                    "fill_alpha": SOURCE_ALPHA_FIELD,
                eff_kwargs = self.glyph_kwargs[_key].copy()
      *xy_axes, name=_key, source=_source, **eff_kwargs)
                self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")

    __init__(self, df_dict, label_col, score_col, **kwargs) special

    Additional construtor

    On top of the requirements of the parent class, the input dataframe should contain:

    • label_col and score_col for "soft predictions".
    Param Type Description
    df_dict dict str -> DataFrame mapping
    label_col str column for the soft label
    score_col str column for the soft score
    **kwargs forwarded to bokeh.plotting.figure
    Source code in hover/core/explorer/
    def __init__(self, df_dict, label_col, score_col, **kwargs):
        ???+ note "Additional construtor"
            On top of the requirements of the parent class,
            the input dataframe should contain:
            - label_col and score_col for "soft predictions".
            | Param       | Type   | Description                  |
            | :---------- | :----- | :--------------------------- |
            | `df_dict`   | `dict` | `str` -> `DataFrame` mapping |
            | `label_col` | `str`  | column for the soft label    |
            | `score_col` | `str`  | column for the soft score    |
            | `**kwargs`  |        | forwarded to `bokeh.plotting.figure` |
        assert label_col != "label", "'label' field is reserved"
        self.label_col = label_col
        self.score_col = score_col
        super().__init__(df_dict, **kwargs)

    plot(self, **kwargs)

    Plot all data points, setting color alpha based on the soft score.
    Param Type Description
    **kwargs forwarded to plotting markers
    Source code in hover/core/explorer/
    def plot(self, **kwargs):
        ???+ note "Plot all data points, setting color alpha based on the soft score."
            | Param      | Type   | Description                  |
            | :--------- | :----- | :--------------------------- |
            | `**kwargs` |        | forwarded to plotting markers |
        xy_axes = self.find_embedding_fields()[:2]
        for _key, _source in self.sources.items():
            # prepare plot settings
            preset_kwargs = {
                "color": SOURCE_COLOR_FIELD,
                "fill_alpha": SOURCE_ALPHA_FIELD,
            eff_kwargs = self.glyph_kwargs[_key].copy()
  *xy_axes, name=_key, source=_source, **eff_kwargs)
            self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")