Tree-based Models
Concrete-ML
scikit-learn
Concrete-ML
XGboost
Example
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from concrete.ml.sklearn.xgb import XGBClassifier
# Get dataset and split into train and test
X, y = load_breast_cancer(return_X_y=True)
# Split the train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8)
# Define our model
model = XGBClassifier(n_jobs=1, n_bits=3)
# Define the pipeline
# We will normalize the data and apply a PCA before fitting the model
pipeline = Pipeline([("standard_scaler", StandardScaler()), ("pca", PCA()), ("model", model)])
# Define the parameters to tune
param_grid = {
"pca__n_components": [2, 5, 10, 15],
"model__max_depth": [2, 3, 5],
"model__n_estimators": [5, 10, 20],
}
# Instantiate the grid search with 5-fold cross validation on all available cores
grid = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, scoring="accuracy")
# Launch the grid search
grid.fit(X_train, y_train)
# Print the best parameters found
print(f"Best parameters found: {grid.best_params_}")
# Output:
# Best parameters found: {'model__max_depth': 5, 'model__n_estimators': 10, 'pca__n_components': 5}
# Currently we only focus on model inference in FHE
# The data transformation will be done in clear (client machine)
# while the model inference will be done in FHE on a server.
# The pipeline can be split into 2 parts:
# 1. data transformation
# 2. estimator
best_pipeline = grid.best_estimator_
data_transformation_pipeline = best_pipeline[:-1]
model = best_pipeline[-1]
# Transform test set
X_train_transformed = data_transformation_pipeline.transform(X_train)
X_test_transformed = data_transformation_pipeline.transform(X_test)
# Evaluate the model on the test set in clear
y_pred_clear = model.predict(X_test_transformed)
print(f"Test accuracy in clear: {(y_pred_clear == y_test).mean():0.2f}")
# Output:
# Test accuracy: 0.98
# Compile the model to FHE
model.compile(X_train_transformed)
# Perform the inference in FHE
# Warning: this will take a while. It is recommended to run this with a very small batch of
# example first (e.g. N_TEST_FHE = 1)
# Note that here the encryption and decryption is done behind the scene.
N_TEST_FHE = 1
y_pred_fhe = model.predict(X_test_transformed[:N_TEST_FHE], execute_in_fhe=True)
# Assert that FHE predictions are the same as the clear predictions
print(f"{(y_pred_fhe == y_pred_clear[:N_TEST_FHE]).sum()} "
f"examples over {N_TEST_FHE} have a FHE inference equal to the clear inference.")
# Output:
# 1 examples over 1 have a FHE inference equal to the clear inference
Last updated
Was this helpful?