Source code for paddlets.analysis.analysis_report

# !/usr/bin/env python3
# -*- coding:utf-8 -*-

from io import BytesIO
import inspect
import json
import os
from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, Dict

import pandas as pd
from docx import Document
from docx.shared import Inches

from paddlets import TimeSeries, TSDataset, analysis
from paddlets.analysis.base import Analyzer
from paddlets.logger import Logger, raise_if_not, raise_if, raise_log

logger = Logger(__name__)

# Default_analyzers
DEFAULT_ANALYZERS = ["summary", "max"]


[docs]class AnalysisReport(object):
    """
    AnalysisReport
    Aggregate the results of analyzers, show them in format of tables and charts in reports.
    Currently supprt docx and json Analysis report.
    
    Args:
        dataset[TSDataset]: TSDataset to be analyzed
        names[str|List[str]]: Analyzer names, set to DEFAULT_ANALYZERS by default
        params[Dict]: analyzers params
        columns[str|List[str]]: columns to be analyzed
    
    Examples:
        .. code-block:: python

            # example for names.
            names = ["max", "outlier"]

            # example for params:
            params = {
                "max":{
                "param1":1
                },
            "outlier":{
                "param1":1,
                "param2":2
                }}

    """

    def __init__(
            self,
            dataset: TSDataset,
            names: Union[str, List[str]] = None,
            params: Dict = None,
            columns: Optional[Union[str, List[str]]] = None
    ) -> None:

        if names == None:
            names = DEFAULT_ANALYZERS
        else:
            self._validate_analyzers_names(names)

        self._dataset = dataset
        self._names = names
        self._columns = columns
        self._analyzers = self._get_analyzers(names, params)

[docs]    def export_docx_report(self, path: str = ".", file_name: str = "analysis_report.docx") -> None:
        """
        Export a report in the docx format
        
        Args:
            path[str]: path to save the exported report, set to the current path by default
            file_name[str]: file name ,default set to "analysis_report.pdf"

        Returns:
            None

        """
        # Validate export path
        if not os.path.exists(path):
            raise_log(ValueError("export path do not exist, please check"))

        # Create new Document
        document = Document()

        self._report_formating(document)

        figure = self._dataset.plot().get_figure()
        if figure:
            memfile = BytesIO()
            figure.savefig(memfile)
            document.add_heading("Data View", level=2)
            document.add_picture(memfile, width=Inches(5.0))

        for analyzer in self._analyzers:
            properties = analyzer.get_properties()
            report_heading = properties.get("report_heading")
            report_description = properties.get("report_description")
            document.add_heading(report_heading, level=2)
            document.add_paragraph(report_description)
            
            # Display the analysis result
            document.add_paragraph("Analysis Results", style='ListBullet')
            analysis_result = analyzer(self._dataset, self._columns)
            if isinstance(analysis_result, pd.Series):
                analysis_result = pd.DataFrame(analysis_result)
            # Dataframe to table
            if isinstance(analysis_result, pd.DataFrame):
                indexes = analysis_result.index.to_list()
                t = document.add_table(analysis_result.shape[0]+1, analysis_result.shape[1]+1)
                # Add the header rows.
                t.cell(0,1).text = ""
                for j in range(analysis_result.shape[-1]):
                    t.cell(0,j+1).text = str(analysis_result.columns[j])
                    # Add the rest of the data frame
                for i in range(analysis_result.shape[0]):
                    t.cell(i+1,0).text = str(indexes[i])
                    for j in range(analysis_result.shape[-1]):
                        t.cell(i+1,j+1).text = str(analysis_result.values[i,j])
                t.style = "Table Grid"
            else: 
                document.add_paragraph(str(analysis_result))

            # Add figures
            figure = analyzer.plot()
            if figure:
                memfile = BytesIO()
                figure.savefig(memfile)
                document.add_paragraph("Charts", style='ListBullet')
                document.add_picture(memfile, width=Inches(5.0))

        document.add_page_break()
        path = path + "/" + file_name
        document.save(path)
        logger.info(f"save report succcess, save at {path}")
    
[docs]    def export_json_report(self, log: bool = True) -> Dict:
        """
        Export a report in the Json format
        
        Args:
            log[bool]: print log or not, default set to True

        Returns:
            Dict

        """
        json_report = {}

        for analyzer in self._analyzers:
            analyzer_report =  {}
            properties = analyzer.get_properties()
            report_heading = properties.get("report_heading")
            report_description = properties.get("report_description")

            analyzer_report["heading"] = report_heading
            analyzer_report["description"] = report_description

            analysis_res = analyzer(self._dataset, self._columns)

            if isinstance(analysis_res, pd.DataFrame) or isinstance(analysis_res, pd.Series):
                analyzer_report["analysis_results"] = analysis_res.to_json()
            else:
                analyzer_report["analysis_results"] = analysis_res

            analyzer_name = properties.get("name")
            json_report[analyzer_name] = analyzer_report 
                
        if log:
            logger.info(json_report)
        
        return json_report

    def _report_formating(self, document: Document) -> None:
        """
        Initialize analysis report 
        
        Args:
            document[Document]: document need to be initialized 

        Returns:
            None

        """
        document.add_heading(u' Data Analysis Report ', 0)
        # Add_aragraph
        document.add_paragraph(u'This report shows some analysis results in the form of tables and charts')
        document.add_paragraph(u'It is designed to give users a brief overview about the dataset')
        document.add_paragraph(u'Currently, the following analysis methods are supported, including:')
        # ListBullet
        document.add_paragraph(
            u'summary, max, fft, stft, cwt', style='ListBullet')

    def _get_analyzers(self, names: Union[str, List[str]], params: Dict = None) -> List[Analyzer]:
        """
        Get analyzer objects
        
        Args:
            names[str|List(str)]: analyzers name list, Not None
            params: the parameters of each analyzer

        Returns:
            List[Analyzer]

        Examples:
            .. code-block:: python
            example for params：
                   {
                    "max":{
                    "    param1":1
                        },
                    "outlier":{
                        "param1":1,
                        "param2":2
                    }}

        """
        analyzers = []
        analyzers_mapping = self._get_analyzers_mapping()
        for name, analyzer_obj in analyzers_mapping.items():
            if name not in names:
                continue
            param = params.get(name, None) if params is not None else None
            if param:
                analyzers.append(analyzer_obj(**param))
            else:
                analyzers.append(analyzer_obj())

        return analyzers

[docs]    def get_all_analyzers_names(self, log: bool = True) -> List[str]:
        """
        Get the names of analyzers
        This method can be called internally or externally, and the parameter log is set to False or True accordingly.
        
        Args:
            log(bool) : Whether to print the log, 
                        the default is True when used externally, and set to False when called internally

        Returns:
            List[str]
        """
        analyzers_mapping = self._get_analyzers_mapping()
        analyzers_names = []
        for key, value in analyzers_mapping.items():
            analyzers_names.append(key)
        if log:
            logger.info("current support analyzers:" + ','.join(analyzers_names))

        return analyzers_names

    def _get_analyzers_mapping(self) -> Dict[str, Analyzer]:
        """
        Get the mapping dict between the name and the instance of the analyzer
        
        Args:
            None

        Returns:
            Dict[str, Analyzer]:A map containing name-analyzer pairs.

        """
        from paddlets import analysis
        analyzers_mapping = {}
        for name, obj in inspect.getmembers(analysis, inspect.isclass):
            if name == self.__class__.__name__:
                continue
            analyzers_mapping[obj.get_properties()['name']] = obj

        return analyzers_mapping

    def _validate_analyzers_names(self, names: Union[str, List[str]] = None) -> None:
        """
        Validate the names of analyzer input by the user 
        If the analyzer names entered by the user do not exist in the library, an error will be reported
        
        Args:
            names(str|List(str)):Names of analyzers

        Returns:
            None

        Raise:
            ValueError

        """
        analyzer_names = self.get_all_analyzers_names(log=False)

        missing_names = set(names) - set(analyzer_names)
        raise_if_not(len(missing_names) == 0,
            f"Invalid analyzer names, analyzer {missing_names} do not exist, please use get_all_analyzers_names() method to get currently supported analyzers!")