Source code for paddlets.analysis.analysis_report

# !/usr/bin/env python3
# -*- coding:utf-8 -*-

from io import BytesIO
import inspect
import json
import os
from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, Dict

import pandas as pd
from docx import Document
from docx.shared import Inches

from paddlets import TimeSeries, TSDataset, analysis
from paddlets.analysis.base import Analyzer
from paddlets.logger import Logger, raise_if_not, raise_if, raise_log

logger = Logger(__name__)

# Default_analyzers
DEFAULT_ANALYZERS = ["summary", "max"]


[docs]class AnalysisReport(object): """ AnalysisReport Aggregate the results of analyzers, show them in format of tables and charts in reports. Currently supprt docx and json Analysis report. Args: dataset[TSDataset]: TSDataset to be analyzed names[str|List[str]]: Analyzer names, set to DEFAULT_ANALYZERS by default params[Dict]: analyzers params columns[str|List[str]]: columns to be analyzed Examples: .. code-block:: python # example for names. names = ["max", "outlier"] # example for params: params = { "max":{ "param1":1 }, "outlier":{ "param1":1, "param2":2 }} """ def __init__( self, dataset: TSDataset, names: Union[str, List[str]] = None, params: Dict = None, columns: Optional[Union[str, List[str]]] = None ) -> None: if names == None: names = DEFAULT_ANALYZERS else: self._validate_analyzers_names(names) self._dataset = dataset self._names = names self._columns = columns self._analyzers = self._get_analyzers(names, params)
[docs] def export_docx_report(self, path: str = ".", file_name: str = "analysis_report.docx") -> None: """ Export a report in the docx format Args: path[str]: path to save the exported report, set to the current path by default file_name[str]: file name ,default set to "analysis_report.pdf" Returns: None """ # Validate export path if not os.path.exists(path): raise_log(ValueError("export path do not exist, please check")) # Create new Document document = Document() self._report_formating(document) figure = self._dataset.plot().get_figure() if figure: memfile = BytesIO() figure.savefig(memfile) document.add_heading("Data View", level=2) document.add_picture(memfile, width=Inches(5.0)) for analyzer in self._analyzers: properties = analyzer.get_properties() report_heading = properties.get("report_heading") report_description = properties.get("report_description") document.add_heading(report_heading, level=2) document.add_paragraph(report_description) # Display the analysis result document.add_paragraph("Analysis Results", style='ListBullet') analysis_result = analyzer(self._dataset, self._columns) if isinstance(analysis_result, pd.Series): analysis_result = pd.DataFrame(analysis_result) # Dataframe to table if isinstance(analysis_result, pd.DataFrame): indexes = analysis_result.index.to_list() t = document.add_table(analysis_result.shape[0]+1, analysis_result.shape[1]+1) # Add the header rows. t.cell(0,1).text = "" for j in range(analysis_result.shape[-1]): t.cell(0,j+1).text = str(analysis_result.columns[j]) # Add the rest of the data frame for i in range(analysis_result.shape[0]): t.cell(i+1,0).text = str(indexes[i]) for j in range(analysis_result.shape[-1]): t.cell(i+1,j+1).text = str(analysis_result.values[i,j]) t.style = "Table Grid" else: document.add_paragraph(str(analysis_result)) # Add figures figure = analyzer.plot() if figure: memfile = BytesIO() figure.savefig(memfile) document.add_paragraph("Charts", style='ListBullet') document.add_picture(memfile, width=Inches(5.0)) document.add_page_break() path = path + "/" + file_name document.save(path) logger.info(f"save report succcess, save at {path}")
[docs] def export_json_report(self, log: bool = True) -> Dict: """ Export a report in the Json format Args: log[bool]: print log or not, default set to True Returns: Dict """ json_report = {} for analyzer in self._analyzers: analyzer_report = {} properties = analyzer.get_properties() report_heading = properties.get("report_heading") report_description = properties.get("report_description") analyzer_report["heading"] = report_heading analyzer_report["description"] = report_description analysis_res = analyzer(self._dataset, self._columns) if isinstance(analysis_res, pd.DataFrame) or isinstance(analysis_res, pd.Series): analyzer_report["analysis_results"] = analysis_res.to_json() else: analyzer_report["analysis_results"] = analysis_res analyzer_name = properties.get("name") json_report[analyzer_name] = analyzer_report if log: logger.info(json_report) return json_report
def _report_formating(self, document: Document) -> None: """ Initialize analysis report Args: document[Document]: document need to be initialized Returns: None """ document.add_heading(u' Data Analysis Report ', 0) # Add_aragraph document.add_paragraph(u'This report shows some analysis results in the form of tables and charts') document.add_paragraph(u'It is designed to give users a brief overview about the dataset') document.add_paragraph(u'Currently, the following analysis methods are supported, including:') # ListBullet document.add_paragraph( u'summary, max, fft, stft, cwt', style='ListBullet') def _get_analyzers(self, names: Union[str, List[str]], params: Dict = None) -> List[Analyzer]: """ Get analyzer objects Args: names[str|List(str)]: analyzers name list, Not None params: the parameters of each analyzer Returns: List[Analyzer] Examples: .. code-block:: python example for params: { "max":{ " param1":1 }, "outlier":{ "param1":1, "param2":2 }} """ analyzers = [] analyzers_mapping = self._get_analyzers_mapping() for name, analyzer_obj in analyzers_mapping.items(): if name not in names: continue param = params.get(name, None) if params is not None else None if param: analyzers.append(analyzer_obj(**param)) else: analyzers.append(analyzer_obj()) return analyzers
[docs] def get_all_analyzers_names(self, log: bool = True) -> List[str]: """ Get the names of analyzers This method can be called internally or externally, and the parameter log is set to False or True accordingly. Args: log(bool) : Whether to print the log, the default is True when used externally, and set to False when called internally Returns: List[str] """ analyzers_mapping = self._get_analyzers_mapping() analyzers_names = [] for key, value in analyzers_mapping.items(): analyzers_names.append(key) if log: logger.info("current support analyzers:" + ','.join(analyzers_names)) return analyzers_names
def _get_analyzers_mapping(self) -> Dict[str, Analyzer]: """ Get the mapping dict between the name and the instance of the analyzer Args: None Returns: Dict[str, Analyzer]:A map containing name-analyzer pairs. """ from paddlets import analysis analyzers_mapping = {} for name, obj in inspect.getmembers(analysis, inspect.isclass): if name == self.__class__.__name__: continue analyzers_mapping[obj.get_properties()['name']] = obj return analyzers_mapping def _validate_analyzers_names(self, names: Union[str, List[str]] = None) -> None: """ Validate the names of analyzer input by the user If the analyzer names entered by the user do not exist in the library, an error will be reported Args: names(str|List(str)):Names of analyzers Returns: None Raise: ValueError """ analyzer_names = self.get_all_analyzers_names(log=False) missing_names = set(names) - set(analyzer_names) raise_if_not(len(missing_names) == 0, f"Invalid analyzer names, analyzer {missing_names} do not exist, please use get_all_analyzers_names() method to get currently supported analyzers!")