explanations

desdeo.explanations.explainer

Explainers are defined here.

ShapExplainer

Defines a SHAP explainer for reference point based methods.

Source code in desdeo/explanations/explainer.py

class ShapExplainer:
    """Defines a SHAP explainer for reference point based methods."""

    def __init__(self, problem_data: pl.DataFrame, input_symbols: list[str], output_symbols: list[str]):
        """Initialize the explainer.

        Initializes the explainer with given data, and input and output symbols.
        The data should contain the columns listed in the input and output symbols.
        This data is then used to simulate the inputs and outputs of an (interactive)
        multiobjective optimization method, which is used to explain the relation of its
        inputs and outputs using SHAP values.

        Note:
            The `data` can be generated by for a reference point based based by, e.g.,
            randomly sampling the input space and then evaluating the methods with the
            sampled inputs to generate outputs.

        Args:
            problem_data (pl.DataFrame): the data to simulate the input and
                outputs of a multiobjective optimization method.
            input_symbols (list[str]): the input symbols present in `data`.
                These symbols represent the inputs to the method.
            output_symbols (list[str]): the output symbols present in `data`.
                These symbols represent the outputs of the method.
        """
        self.data = problem_data
        self.input_symbols = input_symbols
        self.output_symbols = output_symbols
        self.input_array = self.data[self.input_symbols].to_numpy()
        self.output_array = self.data[self.output_symbols].to_numpy()
        self.to_output_tree = cKDTree(self.input_array)
        self.explainer = None

    def setup(self, background_data: pl.DataFrame):
        """Setup the explainer.

        Setups the SHAP explainer with  the given background data. The
        background data should have the columns `self.input_symbols`. The
        background data is used as the background (or missing data) when
        computing SHAP values.  The mean (or expected values) of the background
        data's output (`self.output_symbols`) will determine the baseline of the
        SHAP values.

        Note:
            To generate a dataset with meaningful expected values, e.g., in case
            the SHAP values are better understood by relating them to a specific baseline,
            see `desdeo.explanations.generate_biased_mean_data`.

        Args:
            background_data (pl.DataFrame): the background data.
        """
        self.explainer = shap.Explainer(
            self.evaluate,
            masker=background_data[self.input_symbols].to_numpy(),
        )

    def evaluate(self, evaluate_array: np.ndarray) -> np.ndarray:
        """Evaluates the multiobjective optimization method represented by the data.

        Note:
            Evaluation happens by finding the closest matching input array in the
            `self.input_array` and then using that value's corresponding output
            as the evaluation result. Closest means lowest Euclidean distance.

        Args:
            evaluate_array (np.ndarray): the inputs to the method represented by the data.
                Can be either a single input, or an array of multiple inputs. Used mainly by
                `self.explain_input`.

        Returns:
            np.ndarray: the evaluated output(s) corresponding to the input data.
        """
        _, indices = self.to_output_tree.query(evaluate_array)

        return self.output_array[indices]

    def explain_input(self, to_be_explained: pl.DataFrame) -> dict:
        """Explain an input and produces SHAP values.

        Args:
            to_be_explained (pl.DataFrame): the input to be explained. The
                dataframe must have the columns defined in `self.input_symbols`.

        Returns:
            dict: the key 'shaps' corresponds to the computed SHAP values for
                the input, the key 'base_values' is the baseline the SHAP values
                were computed against, and the key 'data' is the input the SHAP
                values were computed for.
        """
        _to_be_explained = to_be_explained[self.input_symbols].to_numpy()

        return self.explainer(_to_be_explained)

init

__init__(
    problem_data: DataFrame,
    input_symbols: list[str],
    output_symbols: list[str],
)

Initialize the explainer.

Initializes the explainer with given data, and input and output symbols. The data should contain the columns listed in the input and output symbols. This data is then used to simulate the inputs and outputs of an (interactive) multiobjective optimization method, which is used to explain the relation of its inputs and outputs using SHAP values.

Note

The data can be generated by for a reference point based based by, e.g., randomly sampling the input space and then evaluating the methods with the sampled inputs to generate outputs.

Parameters:

Name	Type	Description	Default
`problem_data`	`DataFrame`	the data to simulate the input and outputs of a multiobjective optimization method.	required
`input_symbols`	`list[str]`	the input symbols present in `data`. These symbols represent the inputs to the method.	required
`output_symbols`	`list[str]`	the output symbols present in `data`. These symbols represent the outputs of the method.	required

Source code in desdeo/explanations/explainer.py

def __init__(self, problem_data: pl.DataFrame, input_symbols: list[str], output_symbols: list[str]):
    """Initialize the explainer.

    Initializes the explainer with given data, and input and output symbols.
    The data should contain the columns listed in the input and output symbols.
    This data is then used to simulate the inputs and outputs of an (interactive)
    multiobjective optimization method, which is used to explain the relation of its
    inputs and outputs using SHAP values.

    Note:
        The `data` can be generated by for a reference point based based by, e.g.,
        randomly sampling the input space and then evaluating the methods with the
        sampled inputs to generate outputs.

    Args:
        problem_data (pl.DataFrame): the data to simulate the input and
            outputs of a multiobjective optimization method.
        input_symbols (list[str]): the input symbols present in `data`.
            These symbols represent the inputs to the method.
        output_symbols (list[str]): the output symbols present in `data`.
            These symbols represent the outputs of the method.
    """
    self.data = problem_data
    self.input_symbols = input_symbols
    self.output_symbols = output_symbols
    self.input_array = self.data[self.input_symbols].to_numpy()
    self.output_array = self.data[self.output_symbols].to_numpy()
    self.to_output_tree = cKDTree(self.input_array)
    self.explainer = None

evaluate

evaluate(evaluate_array: ndarray) -> np.ndarray

Evaluates the multiobjective optimization method represented by the data.

Note

Evaluation happens by finding the closest matching input array in the self.input_array and then using that value's corresponding output as the evaluation result. Closest means lowest Euclidean distance.

Parameters:

Name	Type	Description	Default
`evaluate_array`	`ndarray`	the inputs to the method represented by the data. Can be either a single input, or an array of multiple inputs. Used mainly by `self.explain_input`.	required

Returns:

Type	Description
`ndarray`	np.ndarray: the evaluated output(s) corresponding to the input data.

Source code in desdeo/explanations/explainer.py

def evaluate(self, evaluate_array: np.ndarray) -> np.ndarray:
    """Evaluates the multiobjective optimization method represented by the data.

    Note:
        Evaluation happens by finding the closest matching input array in the
        `self.input_array` and then using that value's corresponding output
        as the evaluation result. Closest means lowest Euclidean distance.

    Args:
        evaluate_array (np.ndarray): the inputs to the method represented by the data.
            Can be either a single input, or an array of multiple inputs. Used mainly by
            `self.explain_input`.

    Returns:
        np.ndarray: the evaluated output(s) corresponding to the input data.
    """
    _, indices = self.to_output_tree.query(evaluate_array)

    return self.output_array[indices]

explain_input

explain_input(to_be_explained: DataFrame) -> dict

Explain an input and produces SHAP values.

Parameters:

Name	Type	Description	Default
`to_be_explained`	`DataFrame`	the input to be explained. The dataframe must have the columns defined in `self.input_symbols`.	required

Returns:

Name	Type	Description
`dict`	`dict`	the key 'shaps' corresponds to the computed SHAP values for the input, the key 'base_values' is the baseline the SHAP values were computed against, and the key 'data' is the input the SHAP values were computed for.

Source code in desdeo/explanations/explainer.py

def explain_input(self, to_be_explained: pl.DataFrame) -> dict:
    """Explain an input and produces SHAP values.

    Args:
        to_be_explained (pl.DataFrame): the input to be explained. The
            dataframe must have the columns defined in `self.input_symbols`.

    Returns:
        dict: the key 'shaps' corresponds to the computed SHAP values for
            the input, the key 'base_values' is the baseline the SHAP values
            were computed against, and the key 'data' is the input the SHAP
            values were computed for.
    """
    _to_be_explained = to_be_explained[self.input_symbols].to_numpy()

    return self.explainer(_to_be_explained)

setup

setup(background_data: DataFrame)

Setup the explainer.

Setups the SHAP explainer with the given background data. The background data should have the columns self.input_symbols. The background data is used as the background (or missing data) when computing SHAP values. The mean (or expected values) of the background data's output (self.output_symbols) will determine the baseline of the SHAP values.

Note

To generate a dataset with meaningful expected values, e.g., in case the SHAP values are better understood by relating them to a specific baseline, see desdeo.explanations.generate_biased_mean_data.

Parameters:

Name	Type	Description	Default
`background_data`	`DataFrame`	the background data.	required

Source code in desdeo/explanations/explainer.py

def setup(self, background_data: pl.DataFrame):
    """Setup the explainer.

    Setups the SHAP explainer with  the given background data. The
    background data should have the columns `self.input_symbols`. The
    background data is used as the background (or missing data) when
    computing SHAP values.  The mean (or expected values) of the background
    data's output (`self.output_symbols`) will determine the baseline of the
    SHAP values.

    Note:
        To generate a dataset with meaningful expected values, e.g., in case
        the SHAP values are better understood by relating them to a specific baseline,
        see `desdeo.explanations.generate_biased_mean_data`.

    Args:
        background_data (pl.DataFrame): the background data.
    """
    self.explainer = shap.Explainer(
        self.evaluate,
        masker=background_data[self.input_symbols].to_numpy(),
    )

desdeo.explanations.lagrange

Utilities for working with Lagrange multipliers in explainable multiobjective optimization.

compute_tradeoffs

compute_tradeoffs(
    filtered_multipliers: dict[str, float] | None,
) -> dict[str, dict[str, float]] | None

Compute a tradeoff matrix from filtered Lagrange multipliers.

Tradeoffs represent marginal rates of substitution between objectives. For objectives i and j: tradeoff[i][j] = -lambda_j / lambda_i.

Parameters:

Name	Type	Description	Default
`filtered_multipliers`	`dict[str, float] \| None`	Dict mapping objective keys to their multiplier values. Typically the output of :func:`filter_lagrange_multipliers`.	required

Returns:

Type	Description
`dict[str, dict[str, float]] \| None`	A nested dict where `result[i][j]` is the tradeoff of improving
`dict[str, dict[str, float]] \| None`	objective i on objective j. Diagonal entries are 1.0.
`dict[str, dict[str, float]] \| None`	Returns `None` if input is `None` or empty.

Source code in desdeo/explanations/lagrange.py

def compute_tradeoffs(
    filtered_multipliers: dict[str, float] | None,
) -> dict[str, dict[str, float]] | None:
    """Compute a tradeoff matrix from filtered Lagrange multipliers.

    Tradeoffs represent marginal rates of substitution between objectives.
    For objectives *i* and *j*: ``tradeoff[i][j] = -lambda_j / lambda_i``.

    Args:
        filtered_multipliers: Dict mapping objective keys to their multiplier
            values. Typically the output of :func:`filter_lagrange_multipliers`.

    Returns:
        A nested dict where ``result[i][j]`` is the tradeoff of improving
        objective *i* on objective *j*. Diagonal entries are 1.0.
        Returns ``None`` if input is ``None`` or empty.
    """
    if not filtered_multipliers:
        return None

    objectives = list(filtered_multipliers.keys())
    tradeoffs: dict[str, dict[str, float]] = {}

    for obj_i in objectives:
        tradeoffs[obj_i] = {}
        lambda_i = filtered_multipliers[obj_i]

        if lambda_i == 0:
            for obj_j in objectives:
                tradeoffs[obj_i][obj_j] = 0.0 if obj_i != obj_j else 1.0
            continue

        for obj_j in objectives:
            if obj_i == obj_j:
                tradeoffs[obj_i][obj_j] = 1.0
            else:
                lambda_j = filtered_multipliers[obj_j]
                tradeoffs[obj_i][obj_j] = -lambda_j / lambda_i

    return tradeoffs

determine_active_objectives

determine_active_objectives(
    lagrange_multipliers: list[dict[str, float] | None],
    constraint_values: list[dict[str, float] | None]
    | None = None,
    objective_symbols: list[str] | None = None,
    threshold: float = 1e-05,
) -> list[list[str]]

Determine which objectives are active (binding) for each solution.

An objective is considered active if its corresponding constraint is binding (constraint value >= 0) or, when constraint values are not available, if its multiplier magnitude exceeds a threshold.

Parameters:

Name	Type	Description	Default
`lagrange_multipliers`	`list[dict[str, float] \| None]`	List of filtered multiplier dicts, one per solution.	required
`constraint_values`	`list[dict[str, float] \| None] \| None`	Optional list of filtered constraint value dicts.	`None`
`objective_symbols`	`list[str] \| None`	Optional list of known objective symbols.	`None`
`threshold`	`float`	Multiplier magnitude threshold for the fallback heuristic.	`1e-05`

Returns:

Type	Description
`list[list[str]]`	A list of lists, where each inner list contains the symbols of the
`list[list[str]]`	active objectives for the corresponding solution.

Source code in desdeo/explanations/lagrange.py

def determine_active_objectives(
    lagrange_multipliers: list[dict[str, float] | None],
    constraint_values: list[dict[str, float] | None] | None = None,
    objective_symbols: list[str] | None = None,
    threshold: float = 1e-5,
) -> list[list[str]]:
    """Determine which objectives are active (binding) for each solution.

    An objective is considered active if its corresponding constraint is
    binding (constraint value >= 0) or, when constraint values are not
    available, if its multiplier magnitude exceeds a threshold.

    Args:
        lagrange_multipliers: List of filtered multiplier dicts, one per solution.
        constraint_values: Optional list of filtered constraint value dicts.
        objective_symbols: Optional list of known objective symbols.
        threshold: Multiplier magnitude threshold for the fallback heuristic.

    Returns:
        A list of lists, where each inner list contains the symbols of the
        active objectives for the corresponding solution.
    """
    active_objectives: list[list[str]] = []

    if constraint_values and objective_symbols:
        for constraint_dict in constraint_values:
            if constraint_dict is None:
                active_objectives.append([])
                continue
            active = [obj for obj, value in constraint_dict.items() if obj in objective_symbols and value >= 0]
            active_objectives.append(active)
    elif objective_symbols:
        for multiplier_dict in lagrange_multipliers:
            if multiplier_dict is None:
                active_objectives.append([])
                continue
            active = [obj for obj in objective_symbols if abs(multiplier_dict.get(obj, 0.0)) > threshold]
            active_objectives.append(active)
    else:
        for multiplier_dict in lagrange_multipliers:
            if multiplier_dict is None:
                active_objectives.append([])
                continue
            active = [obj for obj, value in multiplier_dict.items() if abs(value) > threshold]
            active_objectives.append(active)

    return active_objectives

filter_constraint_values

filter_constraint_values(
    constraint_values: dict[str, float],
    objective_symbols: list[str] | None = None,
) -> dict[str, float]

Filter constraint values to keep one representative per objective.

Same grouping logic as :func:filter_lagrange_multipliers but applied to constraint values. Used to determine which constraints are active.

Parameters:

Name	Type	Description	Default
`constraint_values`	`dict[str, float]`	Raw constraint value dict from SolverResults.	required
`objective_symbols`	`list[str] \| None`	Optional list of objective symbols for grouping.	`None`

Returns:

Type	Description
`dict[str, float]`	A dict mapping each objective key to its filtered constraint value.

Source code in desdeo/explanations/lagrange.py

def filter_constraint_values(
    constraint_values: dict[str, float],
    objective_symbols: list[str] | None = None,
) -> dict[str, float]:
    """Filter constraint values to keep one representative per objective.

    Same grouping logic as :func:`filter_lagrange_multipliers` but applied
    to constraint values. Used to determine which constraints are active.

    Args:
        constraint_values: Raw constraint value dict from SolverResults.
        objective_symbols: Optional list of objective symbols for grouping.

    Returns:
        A dict mapping each objective key to its filtered constraint value.
    """
    grouped: dict[str, list[tuple[str, float]]] = defaultdict(list)

    for key, value in constraint_values.items():
        if objective_symbols is None:
            match = re.search(r"f_(\d+)", key)
            if match:
                f_i = match.group(1)
                grouped[f_i].append((key, value))
        else:
            for symbol in objective_symbols:
                if symbol in key:
                    grouped[symbol].append((key, value))
                    break

    filtered: dict[str, float] = {}
    for obj_key, entries in grouped.items():
        preferred = next((entry for entry in entries if not entry[0].endswith("eq")), None)
        if preferred is None and entries:
            preferred = entries[0]

        if preferred:
            key = obj_key if (objective_symbols is not None and obj_key in objective_symbols) else preferred[0]
            filtered[key] = preferred[1]

    return filtered

filter_lagrange_multipliers

filter_lagrange_multipliers(
    lagrange_multipliers: dict[str, float],
    objective_symbols: list[str] | None = None,
) -> dict[str, float]

Filter raw Lagrange multipliers to keep one representative per objective.

Solver results may contain multiple multiplier entries per objective (e.g., from equality and inequality constraints). This function groups them and selects a preferred one (non-equality constraint preferred).

Parameters:

Name	Type	Description	Default
`lagrange_multipliers`	`dict[str, float]`	Raw multiplier dict from SolverResults.	required
`objective_symbols`	`list[str] \| None`	If provided, use these to group multipliers by objective symbol. Otherwise, group by `f_{index}` pattern.	`None`

Returns:

Type	Description
`dict[str, float]`	A dict mapping each objective key to its filtered multiplier value.
`dict[str, float]`	Missing objectives are assigned 0.0.

Source code in desdeo/explanations/lagrange.py

def filter_lagrange_multipliers(  # noqa: C901
    lagrange_multipliers: dict[str, float],
    objective_symbols: list[str] | None = None,
) -> dict[str, float]:
    """Filter raw Lagrange multipliers to keep one representative per objective.

    Solver results may contain multiple multiplier entries per objective
    (e.g., from equality and inequality constraints). This function groups
    them and selects a preferred one (non-equality constraint preferred).

    Args:
        lagrange_multipliers: Raw multiplier dict from SolverResults.
        objective_symbols: If provided, use these to group multipliers by
            objective symbol. Otherwise, group by ``f_{index}`` pattern.

    Returns:
        A dict mapping each objective key to its filtered multiplier value.
        Missing objectives are assigned 0.0.
    """
    grouped: dict[str, list[tuple[str, float]]] = defaultdict(list)

    for key, value in lagrange_multipliers.items():
        if objective_symbols is None:
            match = re.search(r"f_(\d+)", key)
            if match:
                f_i = match.group(1)
                grouped[f_i].append((key, value))
        else:
            for symbol in objective_symbols:
                if symbol in key:
                    grouped[symbol].append((key, value))
                    break

    filtered: dict[str, float] = {}
    for obj_key, entries in grouped.items():
        # Prefer non-"eq" constraints
        preferred = next((entry for entry in entries if not entry[0].endswith("eq")), None)
        if preferred is None and entries:
            preferred = entries[0]

        if preferred:
            # Use the symbol as key if available
            key = obj_key if (objective_symbols is not None and obj_key in objective_symbols) else preferred[0]
            filtered[key] = preferred[1]

    # Ensure every objective has an entry (default 0.0)
    if objective_symbols is not None:
        for symbol in objective_symbols:
            if symbol not in filtered:
                filtered[symbol] = 0.0
    else:
        max_index = -1
        for key in filtered:
            match = re.search(r"f_(\d+)", key)
            if match:
                max_index = max(max_index, int(match.group(1)))
        for i in range(max_index + 1):
            key = f"f_{i}"
            if key not in filtered:
                filtered[key] = 0.0

    return filtered

desdeo.explanations.utils

Utilities specific to explainable multiobjective optimization.

generate_biased_mean_data

generate_biased_mean_data(
    data: ndarray,
    target_means: ndarray,
    min_size: int = 2,
    max_size: int | None = None,
    solver: str = "SCIP",
) -> list | None

Finds a subset of the provided data that has a mean value close to provided target values.

Finds a subset of the provided data that has a mean value close to the provided target values. Formulates a mixed-integer quadratic programming problem to find a subset of data with a mean as close as possible to target_values and a size between min_size and max_size. In other words, the following problems is solved:

$\begin{align} &\min_{\mathbf{x}} & f(\mathbf{x}) & = \sum_{i=1}^m \left[ \left(\frac{1}{k} \sum_{j=1}^n x_j \times \text{data}_j\right)_i - \text{target}_i \right]^2 \\ &\text{s.t.,} & k & = \sum_{i=1}^n x_i,\\ & & k & \leq \text{max_size},\\ & & k & \geq \text{min_size},\\ \end{align}$ , where $n$ is the number of rows in data, $m$ is the number of columns in data, and $k$ is the size of the subset. Notice that the closeness to the target means is based on the Euclidean distance.

Note

Be mindful that this functions can take a long time with a very large data set and large upper bound for the desired subset.

Parameters:

Name	Type	Description	Default
`data`	`ndarray`	the data from which to generate the subset with a biased mean. Should be a 2D array with each row representing a sample and each column the value of the variables in the sample.	required
`target_means`	`ndarray`	the target mean values for each column the generated subset should have values close to.	required
`min_size`	`int`	the minimum size of the generated subset. Defaults to 2.	`2`
`max_size`	`int \| None`	the maximum size of the generated subset. If None, then the maximum size is bound by the numbers of rows in `data`. Defaults to None.	`None`
`solver`	`str`	the selected solver to be used by cvxpy. The solver should support mixed-integer quadratic programming. Defaults to "SCIP".	`'SCIP'`

Returns:

Type	Description
`list \| None`	list \| None: the indices of the samples of the generated subset respect to `data`, i.e., the generated subset is `data[indices]`. If optimization is not successful, returns None instead.

Source code in desdeo/explanations/utils.py

def generate_biased_mean_data(
    data: np.ndarray, target_means: np.ndarray, min_size: int = 2, max_size: int | None = None, solver: str = "SCIP"
) -> list | None:
    r"""Finds a subset of the provided data that has a mean value close to provided target values.

    Finds a subset of the provided data that has a mean value close to the
    provided target values.  Formulates a mixed-integer quadratic programming problem to
    find a subset of `data` with a mean as close as possible to `target_values`
    and a size between `min_size` and `max_size`. In other words, the following problems is solved:

    \begin{align}
        &\min_{\mathbf{x}} & f(\mathbf{x}) & = \sum_{i=1}^m \left[ \left(\frac{1}{k}
            \sum_{j=1}^n x_j \times \text{data}_j\right)_i - \text{target}_i \right]^2 \\
        &\text{s.t.,}   & k & = \sum_{i=1}^n x_i,\\
        &               & k & \leq \text{max_size},\\
        &               & k & \geq \text{min_size},\\
    \end{align},
    where $n$ is the number of rows in `data`, $m$ is the number of columns in
    `data`, and $k$ is the size of the subset. Notice that the closeness to the
    target means is based on the Euclidean distance.

    Note:
        Be mindful that this functions can take a long time with a very large
            data set and large upper bound for the desired subset.

    Args:
        data (np.ndarray): the data from which to generate the subset with a
            biased mean. Should be a 2D array with each row representing a sample
            and each column the value of the variables in the sample.
        target_means (np.ndarray): the target mean values for each column the
            generated subset should have values close to.
        min_size (int, optional): the minimum size of the generated subset. Defaults to 2.
        max_size (int | None, optional): the maximum size of the generated
            subset. If None, then the maximum size is bound by the numbers of rows
            in `data`. Defaults to None.
        solver (str, optional): the selected solver to be used by cvxpy. The
            solver should support mixed-integer quadratic programming. Defaults to
            "SCIP".

    Returns:
        list | None: the indices of the samples of the generated subset respect to
            `data`, i.e., the generated subset is `data[indices]`. If optimization is not successful,
            returns None instead.
    """
    # Number of rows and columns
    n_rows, n_cols = data.shape
    max_size = n_rows if max_size is None else max_size

    # Big M used to penalize the auxiliary variable z
    big_m = data.max(axis=0)

    # Binary variables to select rows from the data
    x = cp.Variable(n_rows, boolean=True)

    # Auxiliary variables, z represents the mean. phi is the weighted sum of the data (weighted by x)
    z = cp.Variable(n_cols)
    phi = cp.Variable((n_rows, n_cols))

    # The objective function, squared values of the difference between the mean
    # of the currently selected subset and the target values.
    objective = cp.sum_squares(z - target_means)

    # Define the constraints
    constraints = [
        # Sets the value of phi
        *[cp.sum(phi[:, col]) == cp.sum(cp.multiply(x, data[:, col])) for col in range(n_cols)],
        # Constraints the values of phi using big M, in practice setting z to be the mean values
        *[phi[:, col] <= cp.multiply(big_m[col], x) for col in range(n_cols)],
        *[phi[:, col] <= z[col] for col in range(n_cols)],
        *[phi[:, col] >= z[col] - cp.multiply(big_m[col], 1 - x) for col in range(n_cols)],
        phi >= 0,
        # Bounds the size of the set: min_size <= k <= max_size
        cp.sum(x) >= min_size,
        cp.sum(x) <= max_size,
    ]

    # Create the problem model
    problem = cp.Problem(cp.Minimize(objective), constraints)

    # Solve the problem
    problem.solve(solver=solver)

    # Return the indices of the found subset
    return [i for i in range(n_rows) if x.value[i] == 1]