Advanced usage of surrogates¶

Here, we present an advanced use case of a data-driven problem. There are four objectives in total, three of which are expensive to evaluate, and one is cheap. The three expensive objectives are approximated by a surrogate model. However, the dataset for each such objective is different. The cheap objective is evaluated using an analytical function.

In [1]:

Copied!





# Filter out warnings
from warnings import filterwarnings

filterwarnings("ignore")

# Importing necessary modules
from pathlib import Path

import numpy as np
import plotly.express as ex
import polars as pl
from joblib import dump
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor

from desdeo.emo import algorithms
from desdeo.problem import Objective, ObjectiveTypeEnum, Problem, Variable
# Filter out warnings
from warnings import filterwarnings

filterwarnings("ignore")

# Importing necessary modules
from pathlib import Path

import numpy as np
import plotly.express as ex
import polars as pl
from joblib import dump
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor

from desdeo.emo import algorithms
from desdeo.problem import Objective, ObjectiveTypeEnum, Problem, Variable

In [2]:

Copied!





# Importing the data and calculating the bounds for the decision variables
root = Path.cwd().parent.parent
ysdata = pl.read_csv(root / "datasets" / "MetallApplication" / "ysdata.csv", infer_schema_length=10000)
utsdata = pl.read_csv(root / "datasets" / "MetallApplication" / "utsdata.csv", infer_schema_length=10000)
elondata = pl.read_csv(root / "datasets" / "MetallApplication" / "elondata.csv", infer_schema_length=10000)

use_cols = ["C", "Si", "Mn", "P", "S", "Mo", "Ni", "Al", "N", "Nb", "V", "B", "Ti", "Cr", "Ce", "Cu", "Zr"]

lower_bounds = np.max(
    (
        ysdata.describe().filter(pl.col("statistic") == "min")[use_cols],
        utsdata.describe().filter(pl.col("statistic") == "min")[use_cols],
        elondata.describe().filter(pl.col("statistic") == "min")[use_cols],
    ),
    axis=0,
)

lower_bounds = {name: value for name, value in zip(use_cols, lower_bounds.flatten())}

upper_bounds = np.min(
    (
        ysdata.describe().filter(pl.col("statistic") == "max")[use_cols],
        utsdata.describe().filter(pl.col("statistic") == "max")[use_cols],
        elondata.describe().filter(pl.col("statistic") == "max")[use_cols],
    ),
    axis=0,
)

upper_bounds = {name: value for name, value in zip(use_cols, upper_bounds.flatten())}
# Importing the data and calculating the bounds for the decision variables
root = Path.cwd().parent.parent
ysdata = pl.read_csv(root / "datasets" / "MetallApplication" / "ysdata.csv", infer_schema_length=10000)
utsdata = pl.read_csv(root / "datasets" / "MetallApplication" / "utsdata.csv", infer_schema_length=10000)
elondata = pl.read_csv(root / "datasets" / "MetallApplication" / "elondata.csv", infer_schema_length=10000)

use_cols = ["C", "Si", "Mn", "P", "S", "Mo", "Ni", "Al", "N", "Nb", "V", "B", "Ti", "Cr", "Ce", "Cu", "Zr"]

lower_bounds = np.max(
    (
        ysdata.describe().filter(pl.col("statistic") == "min")[use_cols],
        utsdata.describe().filter(pl.col("statistic") == "min")[use_cols],
        elondata.describe().filter(pl.col("statistic") == "min")[use_cols],
    ),
    axis=0,
)

lower_bounds = {name: value for name, value in zip(use_cols, lower_bounds.flatten())}

upper_bounds = np.min(
    (
        ysdata.describe().filter(pl.col("statistic") == "max")[use_cols],
        utsdata.describe().filter(pl.col("statistic") == "max")[use_cols],
        elondata.describe().filter(pl.col("statistic") == "max")[use_cols],
    ),
    axis=0,
)

upper_bounds = {name: value for name, value in zip(use_cols, upper_bounds.flatten())}

In [3]:

Copied!





# Train surrogates and save them to disk

for data, obj, technique in zip(
    (ysdata, utsdata, elondata),
    ("YS", "UTS", "ELON"),
    (ExtraTreesRegressor, GradientBoostingRegressor, ExtraTreesRegressor),
    strict=True,
):
    X = data[use_cols]
    y = data[obj]

    model = technique(n_estimators=100)
    model.fit(X, y)

    # Check if path exists
    if not (root / "surrogatemodels").exists():
        (root / "surrogatemodels").mkdir()

    dump(model, root / "surrogatemodels" / f"{obj}.joblib")
# Train surrogates and save them to disk

for data, obj, technique in zip(
    (ysdata, utsdata, elondata),
    ("YS", "UTS", "ELON"),
    (ExtraTreesRegressor, GradientBoostingRegressor, ExtraTreesRegressor),
    strict=True,
):
    X = data[use_cols]
    y = data[obj]

    model = technique(n_estimators=100)
    model.fit(X, y)

    # Check if path exists
    if not (root / "surrogatemodels").exists():
        (root / "surrogatemodels").mkdir()

    dump(model, root / "surrogatemodels" / f"{obj}.joblib")

In [4]:

Copied!





# Model the problem.
# Note that the three surrogate objectives are to be maximized, while the carbon equivalent is to be minimized.

Variables = [
    Variable(name=name, symbol=name, lowerbound=lower_bounds[name], upperbound=upper_bounds[name], variable_type="real")
    for name in use_cols
]

Objectives = [
    Objective(
        name=name,
        symbol=name,
        maximize=True,
        surrogates=[root / "surrogatemodels" / f"{name}.joblib"],
        objective_type=ObjectiveTypeEnum.surrogate,
    )
    for name in ("YS", "UTS", "ELON")
]

carbon_eqv = "C + Mn/6 + (Cr + Mo + V)/5 + (Ni + Cu)/15"

Objectives.append(Objective(name="Carbon Equivalent", symbol="CE", maximize=False, func=carbon_eqv))

problem = Problem(
    name="Metallurgical Application",
    description="A problem from the metallurgical domain.",
    variables=Variables,
    objectives=Objectives,
)
# Model the problem.
# Note that the three surrogate objectives are to be maximized, while the carbon equivalent is to be minimized.

Variables = [
    Variable(name=name, symbol=name, lowerbound=lower_bounds[name], upperbound=upper_bounds[name], variable_type="real")
    for name in use_cols
]

Objectives = [
    Objective(
        name=name,
        symbol=name,
        maximize=True,
        surrogates=[root / "surrogatemodels" / f"{name}.joblib"],
        objective_type=ObjectiveTypeEnum.surrogate,
    )
    for name in ("YS", "UTS", "ELON")
]

carbon_eqv = "C + Mn/6 + (Cr + Mo + V)/5 + (Ni + Cu)/15"

Objectives.append(Objective(name="Carbon Equivalent", symbol="CE", maximize=False, func=carbon_eqv))

problem = Problem(
    name="Metallurgical Application",
    description="A problem from the metallurgical domain.",
    variables=Variables,
    objectives=Objectives,
)

In [5]:

Copied!





# Initialize the NSGA-III algorithm. A non-dominated archive is set up automatically
# and is available via the returned extras object.
solver, extras = algorithms.emo_constructor(emo_options=algorithms.nsga3_options(), problem=problem)

# Run the algorithm
results = solver()

# Visualize the results
ex.parallel_coordinates(
    extras.archive.solutions.to_pandas()[["CE", "YS", "UTS", "ELON"]],
).show(renderer="notebook", include_plotlyjs="cdn")
# Initialize the NSGA-III algorithm. A non-dominated archive is set up automatically
# and is available via the returned extras object.
solver, extras = algorithms.emo_constructor(emo_options=algorithms.nsga3_options(), problem=problem)

# Run the algorithm
results = solver()

# Visualize the results
ex.parallel_coordinates(
    extras.archive.solutions.to_pandas()[["CE", "YS", "UTS", "ELON"]],
).show(renderer="notebook", include_plotlyjs="cdn")