Source code for paddlets.models.classify.dl.cnn

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import pandas as pd
from typing import List, Dict, Any, Callable, Optional

from paddle.optimizer import Optimizer
import paddle.nn.functional as F
import numpy as np
import paddle

from paddlets.models.classify.dl.paddle_base import PaddleBaseClassifier
from paddlets.models.common.callbacks import Callback
from paddlets.datasets import TSDataset
from paddlets.logger import raise_if, raise_if_not

       
class _CNNBlock(paddle.nn.Layer):
    """CNN Network structure.

    Args:
        in_chunk_dim(int) : The numer of feature.
        in_chunk_lens(int) : The lens of input data.
        n_class(int) : number of unique labels
        hidden_config(List[int]) : The ith element represents the number of neurons in the ith hidden layer for cnn.
        activation(Callable[..., paddle.Tensor]) : The activation function for the hidden layers.
        last_activation(Callable[..., paddle.Tensor]) : The activation function for the last hidden layers.
        kernel_size(int) : Kernel size for Conv1D.
        avg_pool_size(int) : size of avg pool
        use_bn(bool) : Whether to use batch normalization.
        use_drop(bool) : Whether to use dropout
        dropout_rate(float): Dropout regularization parameter.
        
    Attributes:
        _nn(paddle.nn.Sequential): Dynamic graph LayerList.
    """
    def __init__(
        self,
        in_chunk_dim: int,
        in_chunk_lens: int,
        n_classes: int,
        hidden_config: List[int]=[6, 12],
        activation: Callable[..., paddle.Tensor]=paddle.nn.Sigmoid,
        last_activation: Callable[..., paddle.Tensor] = paddle.nn.Softmax,
        kernel_size=7,
        avg_pool_size=3,
        use_bn: bool = False,
        use_drop: bool = False,
        dropout_rate: float = 0.5,
    ):
        super(_CNNBlock, self).__init__()
        self._n_classes = n_classes
        self.padding = "valid"
        self.data_format = 'NCL'
        dims, layers = [in_chunk_dim] + hidden_config, []
        for i in range(1, len(dims)):
            layers.append(paddle.nn.Conv1D(dims[i - 1], dims[i], kernel_size, padding=self.padding, data_format=self.data_format))
            if use_bn:
                layers.append(paddle.nn.BatchNorm1D(dims[i], data_format=self.data_format))
            layers.append(activation())
            layers.append(paddle.nn.AvgPool1D(avg_pool_size))
            if use_drop:
                layers.append(paddle.nn.Dropout(dropout_rate))
            in_chunk_lens = (in_chunk_lens - kernel_size + 1) // avg_pool_size
        raise_if(
            in_chunk_lens < 1,
            "Conv1d output size must be greater than or equal to 1, "
            "Please choose a smaller `kernel_size` or bigger `in_chunk_len`"
        )
        layers.append(paddle.nn.Flatten())
        layers.append(
            paddle.nn.Linear(in_chunk_lens * dims[-1], n_classes)
        )
        layers.append(last_activation())
        self._nn = paddle.nn.Sequential(*layers)

    def forward(
        self, 
        X: Dict[str, paddle.Tensor]
    ) -> paddle.Tensor:
        """Forward.

        Args: 
            X(paddle.Tensor): Dict of feature tensor.

        Returns:
            paddle.Tensor: Output of model.
        """
        out = X['features']
        out = paddle.transpose(out, perm=[0, 2, 1])
        out = self._nn(out)
        return out
        

[docs]class CNNClassifier(PaddleBaseClassifier): """CNNClassifier. Args: loss_fn(Callable[..., paddle.Tensor]): Loss function. optimizer_fn(Callable[..., Optimizer]): Optimizer algorithm. optimizer_params(Dict[str, Any]): Optimizer parameters. eval_metrics(List[str]): Evaluation metrics of model. callbacks(List[Callback]): Customized callback functions. batch_size(int): Number of samples per batch. max_epochs(int): Max epochs during training. verbose(int): Verbosity mode. patience(int): Number of epochs to wait for improvement before terminating. seed(int|None): Global random seed. activation(Callable[..., paddle.Tensor]): The activation function for the hidden layers. last_activation(Callable[..., paddle.Tensor]) : The activation function for the last hidden layers. hidden_config(List[int]|None): The ith element represents the number of neurons in the ith hidden layer. kernel_size(int): Kernel size for Conv1D. dropout_rate(float): Dropout regularization parameter. use_bn(bool): Whether to use batch normalization. Attributes: _loss_fn(Callable[..., paddle.Tensor]): Loss function. _optimizer_fn(Callable[..., Optimizer]): Optimizer algorithm. _optimizer_params(Dict[str, Any]): Optimizer parameters. _eval_metrics(List[str]): Evaluation metrics of model. _callbacks(List[Callback]): Customized callback functions. _batch_size(int): Number of samples per batch. _max_epochs(int): Max epochs during training. _verbose(int): Verbosity mode. _patience(int): Number of epochs to wait for improvement before terminating. _seed(int|None): Global random seed. _stop_training(bool): Training status. _activation(Callable[..., paddle.Tensor]): The activation function for the hidden layers. _last_activation(Callable[..., paddle.Tensor]) : The activation function for the last hidden layers. _hidden_config(List[int]|None): The ith element represents the number of neurons in the ith hidden layer. _kernel_size(int): Kernel size for Conv1D. _dropout_rate(float): Dropout regularization parameter. _use_bn(bool): Whether to use batch normalization. """ def __init__( self, loss_fn: Callable[..., paddle.Tensor] = F.cross_entropy, optimizer_fn: Callable[..., Optimizer] = paddle.optimizer.Adam, optimizer_params: Dict[str, Any] = dict(learning_rate=1e-3), eval_metrics: List[str] = [], callbacks: List[Callback] = [], batch_size: int = 32, max_epochs: int = 100, verbose: int = 1, patience: int = 10, seed: Optional[int] = None, activation: Callable[..., paddle.Tensor] = paddle.nn.Sigmoid, last_activation: Callable[..., paddle.Tensor] = paddle.nn.Softmax, use_bn: bool = False, hidden_config: List[int] = [6, 12], kernel_size: int = 7, avg_pool_size=3, dropout_rate: float = 0.2, use_drop: bool = False, ): self._hidden_config = hidden_config self._use_bn = use_bn self._kernel_size = kernel_size self._avg_pool_size = avg_pool_size self._activation = activation self._last_activation = last_activation self._dropout_rate = dropout_rate self._use_drop = use_drop self._dropout_rate = dropout_rate super(CNNClassifier, self).__init__( loss_fn=loss_fn, optimizer_fn=optimizer_fn, optimizer_params=optimizer_params, eval_metrics=eval_metrics, callbacks=callbacks, batch_size=batch_size, max_epochs=max_epochs, verbose=verbose, patience=patience, seed=seed, ) def _update_fit_params( self, train_tsdatasets: List[TSDataset], train_labels: np.ndarray, valid_tsdatasets: List[TSDataset], valid_labels: np.ndarray ) -> Dict[str, Any]: """Infer parameters by TSdataset automatically. Args: train_tsdataset(TSDataset): Train set. train_labels:(np.ndarray) : The train data class labels valid_tsdataset(TSDataset|None): Eval set, used for early stopping. valid_labels:(np.ndarray) : The valid data class labels Returns: Dict[str, Any]: model parameters. """ fit_params = { "feature_dim": train_tsdatasets[0].get_target().data.shape[1], "input_lens" : train_tsdatasets[0].get_target().data.shape[0] } return fit_params def _init_network(self) -> paddle.nn.Layer: """Setup the network. Returns: paddle.nn.Layer. """ return _CNNBlock( self._fit_params["feature_dim"], self._fit_params["input_lens"], self._n_classes, self._hidden_config, self._activation, self._last_activation, self._kernel_size, self._avg_pool_size, self._use_bn, self._use_drop, self._dropout_rate, )