# Copyright (c) 2025 Oracle and/or its affiliates. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License, version 2.0, as # published by the Free Software Foundation. # # This program is designed to work with certain software (including # but not limited to OpenSSL) that is licensed under separate terms, # as designated in a particular file or component or in included license # documentation. The authors of MySQL hereby grant you an # additional permission to link the program and your derivative works # with the separately licensed software that they have either included with # the program or referenced in the documentation. # # Without limiting anything contained in the foregoing, this file, # which is part of MySQL Connector/Python, is also subject to the # Universal FOSS Exception, version 1.0, a copy of which can be found at # http://oss.oracle.com/licenses/universal-foss-exception. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License, version 2.0, for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """Generic transformer utilities for MySQL Connector/Python. Provides a scikit-learn compatible Transformer using HeatWave for fit/transform and scoring operations. """ from typing import Optional, Union import numpy as np import pandas as pd from sklearn.base import TransformerMixin from mysql.ai.ml.base import MyBaseMLModel from mysql.ai.ml.model import ML_TASK from mysql.ai.utils import copy_dict from mysql.connector.abstracts import MySQLConnectionAbstract class MyGenericTransformer(MyBaseMLModel, TransformerMixin): """ MySQL HeatWave scikit-learn compatible generic transformer. Can be used as the transformation step in an sklearn pipeline. Implements fit, transform, explain, and scoring capability, passing options for server-side transform logic. Args: db_connection (MySQLConnectionAbstract): Active MySQL connector database connection. task (str): ML task type for transformer (default: "classification"). score_metric (str): Scoring metric to request from backend (default: "balanced_accuracy"). model_name (str, optional): Custom name for the deployed model. fit_extra_options (dict, optional): Extra fit options. transform_extra_options (dict, optional): Extra options for transformations. score_extra_options (dict, optional): Extra options for scoring. Attributes: score_metric (str): Name of the backend metric to use for scoring (e.g. "balanced_accuracy"). score_extra_options (dict): Dictionary of optional scoring parameters; passed to backend score. transform_extra_options (dict): Dictionary of inference (/predict) parameters for the backend. fit_extra_options (dict): See MyBaseMLModel. _model (MyModel): Underlying interface for database model operations. Methods: fit(X, y): Fit the underlying model using the provided features/targets. transform(X): Transform features using the backend model. score(X, y): Score data using backend metric and options. """ def __init__( self, db_connection: MySQLConnectionAbstract, task: Union[str, ML_TASK] = ML_TASK.CLASSIFICATION, score_metric: str = "balanced_accuracy", model_name: Optional[str] = None, fit_extra_options: Optional[dict] = None, transform_extra_options: Optional[dict] = None, score_extra_options: Optional[dict] = None, ): """ Initialize transformer with required and optional arguments. Args: db_connection: Active MySQL backend database connection. task: ML task type for transformer. score_metric: Requested backend scoring metric. model_name: Optional model name for storage. fit_extra_options: Optional extra options for fitting. transform_extra_options: Optional extra options for transformation/inference. score_extra_options: Optional extra scoring options. Raises: DatabaseError: If a database connection issue occurs. If an operational error occurs during execution. """ MyBaseMLModel.__init__( self, db_connection, task, model_name=model_name, fit_extra_options=fit_extra_options, ) self.score_metric = score_metric self.score_extra_options = copy_dict(score_extra_options) self.transform_extra_options = copy_dict(transform_extra_options) def transform( self, X: pd.DataFrame ) -> pd.DataFrame: # pylint: disable=invalid-name """ Transform input data to model predictions using the underlying helper. Args: X: DataFrame of features to predict/transform. Returns: pd.DataFrame: Results of transformation as returned by backend. Raises: DatabaseError: If provided options are invalid or unsupported, or if the model is not initialized, i.e., fit or import has not been called If a database connection issue occurs. If an operational error occurs during execution. """ return self._model.predict(X, options=self.transform_extra_options) def score( self, X: Union[pd.DataFrame, np.ndarray], # pylint: disable=invalid-name y: Union[pd.DataFrame, np.ndarray], ) -> float: """ Score the transformed data using the backend scoring interface. Args: X: Transformed features. y: Target labels or data for scoring. Returns: float: Score based on backend metric. Raises: DatabaseError: If provided options are invalid or unsupported, or if the model is not initialized, i.e., fit or import has not been called If a database connection issue occurs. If an operational error occurs during execution. """ return self._model.score( X, y, self.score_metric, options=self.score_extra_options )