Advanced usage of surrogates¶
Here, we present an advanced use case of a data-driven problem. There are four objectives in total, three of which are expensive to evaluate, and one is cheap. The three expensive objectives are approximated by a surrogate model. However, the dataset for each such objective is different. The cheap objective is evaluated using an analytical function.
In [1]:
Copied!
# Filter out warnings
from warnings import filterwarnings
filterwarnings("ignore")
# Importing necessary modules
from pathlib import Path
import numpy as np
import plotly.express as ex
import polars as pl
from joblib import dump
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor
from desdeo.emo.hooks.archivers import NonDominatedArchive
from desdeo.emo.methods.EAs import nsga3
from desdeo.problem import Objective, ObjectiveTypeEnum, Problem, Variable
# Filter out warnings
from warnings import filterwarnings
filterwarnings("ignore")
# Importing necessary modules
from pathlib import Path
import numpy as np
import plotly.express as ex
import polars as pl
from joblib import dump
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor
from desdeo.emo.hooks.archivers import NonDominatedArchive
from desdeo.emo.methods.EAs import nsga3
from desdeo.problem import Objective, ObjectiveTypeEnum, Problem, Variable
In [2]:
Copied!
# Importing the data and calculating the bounds for the decision variables
root = Path.cwd().parent.parent
ysdata = pl.read_csv(root / "datasets" / "MetallApplication" / "ysdata.csv", infer_schema_length=10000)
utsdata = pl.read_csv(root / "datasets" / "MetallApplication" / "utsdata.csv", infer_schema_length=10000)
elondata = pl.read_csv(root / "datasets" / "MetallApplication" / "elondata.csv", infer_schema_length=10000)
use_cols = ["C", "Si", "Mn", "P", "S", "Mo", "Ni", "Al", "N", "Nb", "V", "B", "Ti", "Cr", "Ce", "Cu", "Zr"]
lower_bounds = np.max(
(
ysdata.describe().filter(pl.col("statistic") == "min")[use_cols],
utsdata.describe().filter(pl.col("statistic") == "min")[use_cols],
elondata.describe().filter(pl.col("statistic") == "min")[use_cols],
),
axis=0,
)
lower_bounds = {name: value for name, value in zip(use_cols, lower_bounds.flatten())}
upper_bounds = np.min(
(
ysdata.describe().filter(pl.col("statistic") == "max")[use_cols],
utsdata.describe().filter(pl.col("statistic") == "max")[use_cols],
elondata.describe().filter(pl.col("statistic") == "max")[use_cols],
),
axis=0,
)
upper_bounds = {name: value for name, value in zip(use_cols, upper_bounds.flatten())}
# Importing the data and calculating the bounds for the decision variables
root = Path.cwd().parent.parent
ysdata = pl.read_csv(root / "datasets" / "MetallApplication" / "ysdata.csv", infer_schema_length=10000)
utsdata = pl.read_csv(root / "datasets" / "MetallApplication" / "utsdata.csv", infer_schema_length=10000)
elondata = pl.read_csv(root / "datasets" / "MetallApplication" / "elondata.csv", infer_schema_length=10000)
use_cols = ["C", "Si", "Mn", "P", "S", "Mo", "Ni", "Al", "N", "Nb", "V", "B", "Ti", "Cr", "Ce", "Cu", "Zr"]
lower_bounds = np.max(
(
ysdata.describe().filter(pl.col("statistic") == "min")[use_cols],
utsdata.describe().filter(pl.col("statistic") == "min")[use_cols],
elondata.describe().filter(pl.col("statistic") == "min")[use_cols],
),
axis=0,
)
lower_bounds = {name: value for name, value in zip(use_cols, lower_bounds.flatten())}
upper_bounds = np.min(
(
ysdata.describe().filter(pl.col("statistic") == "max")[use_cols],
utsdata.describe().filter(pl.col("statistic") == "max")[use_cols],
elondata.describe().filter(pl.col("statistic") == "max")[use_cols],
),
axis=0,
)
upper_bounds = {name: value for name, value in zip(use_cols, upper_bounds.flatten())}
In [3]:
Copied!
# Train surrogates and save them to disk
for data, obj, technique in zip(
(ysdata, utsdata, elondata),
("YS", "UTS", "ELON"),
(ExtraTreesRegressor, GradientBoostingRegressor, ExtraTreesRegressor),
strict=True,
):
X = data[use_cols]
y = data[obj]
model = technique(n_estimators=100)
model.fit(X, y)
# Check if path exists
if not (root / "surrogatemodels").exists():
(root / "surrogatemodels").mkdir()
dump(model, root / "surrogatemodels" / f"{obj}.joblib")
# Train surrogates and save them to disk
for data, obj, technique in zip(
(ysdata, utsdata, elondata),
("YS", "UTS", "ELON"),
(ExtraTreesRegressor, GradientBoostingRegressor, ExtraTreesRegressor),
strict=True,
):
X = data[use_cols]
y = data[obj]
model = technique(n_estimators=100)
model.fit(X, y)
# Check if path exists
if not (root / "surrogatemodels").exists():
(root / "surrogatemodels").mkdir()
dump(model, root / "surrogatemodels" / f"{obj}.joblib")
In [4]:
Copied!
# Model the problem.
# Note that the three surrogate objectives are to be maximized, while the carbon equivalent is to be minimized.
Variables = [
Variable(name=name, symbol=name, lowerbound=lower_bounds[name], upperbound=upper_bounds[name], variable_type="real")
for name in use_cols
]
Objectives = [
Objective(
name=name,
symbol=name,
maximize=True,
surrogates=[root / "surrogatemodels" / f"{name}.joblib"],
objective_type=ObjectiveTypeEnum.surrogate,
)
for name in ("YS", "UTS", "ELON")
]
carbon_eqv = "C + Mn/6 + (Cr + Mo + V)/5 + (Ni + Cu)/15"
Objectives.append(Objective(name="Carbon Equivalent", symbol="CE", maximize=False, func=carbon_eqv))
problem = Problem(
name="Metallurgical Application",
description="A problem from the metallurgical domain.",
variables=Variables,
objectives=Objectives,
)
# Model the problem.
# Note that the three surrogate objectives are to be maximized, while the carbon equivalent is to be minimized.
Variables = [
Variable(name=name, symbol=name, lowerbound=lower_bounds[name], upperbound=upper_bounds[name], variable_type="real")
for name in use_cols
]
Objectives = [
Objective(
name=name,
symbol=name,
maximize=True,
surrogates=[root / "surrogatemodels" / f"{name}.joblib"],
objective_type=ObjectiveTypeEnum.surrogate,
)
for name in ("YS", "UTS", "ELON")
]
carbon_eqv = "C + Mn/6 + (Cr + Mo + V)/5 + (Ni + Cu)/15"
Objectives.append(Objective(name="Carbon Equivalent", symbol="CE", maximize=False, func=carbon_eqv))
problem = Problem(
name="Metallurgical Application",
description="A problem from the metallurgical domain.",
variables=Variables,
objectives=Objectives,
)
In [5]:
Copied!
# Initialize the NSGA-III algorithm and hook up the archive
solver, pub = nsga3(problem=problem)
archive = NonDominatedArchive(problem=problem, publisher=pub)
pub.auto_subscribe(archive)
# Run the algorithm
results = solver()
# Visualize the results
ex.parallel_coordinates(
archive.solutions.to_pandas()[["CE", "YS", "UTS", "ELON"]],
).show(renderer="notebook", include_plotlyjs="cdn")
# Initialize the NSGA-III algorithm and hook up the archive
solver, pub = nsga3(problem=problem)
archive = NonDominatedArchive(problem=problem, publisher=pub)
pub.auto_subscribe(archive)
# Run the algorithm
results = solver()
# Visualize the results
ex.parallel_coordinates(
archive.solutions.to_pandas()[["CE", "YS", "UTS", "ELON"]],
).show(renderer="notebook", include_plotlyjs="cdn")