Pipeline
Basic pipeline for num and cat variables
Library: scikit-learn
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
# Pipeline for categorical columns
cat_pipeline = make_pipeline(
SimpleImputer(strategy="most_frequent"),
OneHotEncoder(handle_unknown="ignore"))
# Pipeline for numeric columns
num_pipeline = make_pipeline(
SimpleImputer(strategy="median"),
StandardScaler())
# Combine the two pipelines together
preprocessing = ColumnTransformer([
("numeric", num_pipeline, make_column_selector(dtype_include=np.number)),
("categorical", cat_pipeline, make_column_selector(dtype_include=object))],
remainder='passthrough')