Source code for pycredits.data_preprocessing

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd

[docs] def preprocess_data(df, numeric_features, categorical_features): """Preprocesses the input DataFrame by applying scaling to numeric features and one-hot encoding to categorical features. Parameters: ----------- df : pandas.DataFrame Input DataFrame. numeric_features : list List of names of numeric features. categorical_features : list List of names of categorical features. Returns: -------- tuple Tuple containing preprocessed features (X_transformed), target variable (y), and preprocessor object (preprocessor). Examples: --------- >>> from sklearn.preprocessing import StandardScaler, OneHotEncoder >>> from sklearn.compose import ColumnTransformer >>> preprocess_data(df, ["Age", "Credit amount"], ["Status", "Credit history"]) """ # "Credit risk" is the target variable and is always dropped y = df["Credit_risk"] X = df.drop("Credit_risk", axis=1) # Creating transformers for numeric and categorical data numeric_transformer = StandardScaler() categorical_transformer = OneHotEncoder(handle_unknown='ignore') # Combining transformers into a ColumnTransformer preprocessor = ColumnTransformer( transformers=[ ("num", numeric_transformer, numeric_features), ('cat', categorical_transformer, categorical_features)]) # Applying the transformations X_transformed = preprocessor.fit_transform(X) # Convert X_transformed and y to DataFrames X_transformed_df = pd.DataFrame(X_transformed, columns=[f"Feature_{i}" for i in range(X_transformed.shape[1])]) y_df = pd.DataFrame(y, columns=["Target"]) return X_transformed, y, preprocessor