Shortcuts

mmcv.runner.hooks.logger.text 源代码

# Copyright (c) OpenMMLab. All rights reserved.
import datetime
import os
import os.path as osp
from collections import OrderedDict

import torch
import torch.distributed as dist

import mmcv
from mmcv.fileio.file_client import FileClient
from mmcv.utils import is_tuple_of, scandir
from ..hook import HOOKS
from .base import LoggerHook


[文档]@HOOKS.register_module() class TextLoggerHook(LoggerHook): """Logger hook in text. In this logger hook, the information will be printed on terminal and saved in json file. Args: by_epoch (bool, optional): Whether EpochBasedRunner is used. Default: True. interval (int, optional): Logging interval (every k iterations). Default: 10. ignore_last (bool, optional): Ignore the log of last iterations in each epoch if less than :attr:`interval`. Default: True. reset_flag (bool, optional): Whether to clear the output buffer after logging. Default: False. interval_exp_name (int, optional): Logging interval for experiment name. This feature is to help users conveniently get the experiment information from screen or log file. Default: 1000. out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. If ``out_dir`` is specified, logs will be copied to a new directory which is the concatenation of ``out_dir`` and the last level directory of ``runner.work_dir``. Default: None. `New in version 1.3.16.` out_suffix (str or tuple[str], optional): Those filenames ending with ``out_suffix`` will be copied to ``out_dir``. Default: ('.log.json', '.log', '.py'). `New in version 1.3.16.` keep_local (bool, optional): Whether to keep local log when :attr:`out_dir` is specified. If False, the local log will be removed. Default: True. `New in version 1.3.16.` file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. `New in version 1.3.16.` """ def __init__(self, by_epoch=True, interval=10, ignore_last=True, reset_flag=False, interval_exp_name=1000, out_dir=None, out_suffix=('.log.json', '.log', '.py'), keep_local=True, file_client_args=None): super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, by_epoch) self.by_epoch = by_epoch self.time_sec_tot = 0 self.interval_exp_name = interval_exp_name if out_dir is None and file_client_args is not None: raise ValueError( 'file_client_args should be "None" when `out_dir` is not' 'specified.') self.out_dir = out_dir if not (out_dir is None or isinstance(out_dir, str) or is_tuple_of(out_dir, str)): raise TypeError('out_dir should be "None" or string or tuple of ' 'string, but got {out_dir}') self.out_suffix = out_suffix self.keep_local = keep_local self.file_client_args = file_client_args if self.out_dir is not None: self.file_client = FileClient.infer_client(file_client_args, self.out_dir) def before_run(self, runner): super(TextLoggerHook, self).before_run(runner) if self.out_dir is not None: self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # The final `self.out_dir` is the concatenation of `self.out_dir` # and the last level directory of `runner.work_dir` basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) runner.logger.info( (f'Text logs will be saved to {self.out_dir} by ' f'{self.file_client.name} after the training process.')) self.start_iter = runner.iter self.json_log_path = osp.join(runner.work_dir, f'{runner.timestamp}.log.json') if runner.meta is not None: self._dump_log(runner.meta, runner) def _get_max_memory(self, runner): device = getattr(runner.model, 'output_device', None) mem = torch.cuda.max_memory_allocated(device=device) mem_mb = torch.tensor([mem / (1024 * 1024)], dtype=torch.int, device=device) if runner.world_size > 1: dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) return mem_mb.item() def _log_info(self, log_dict, runner): # print exp name for users to distinguish experiments # at every ``interval_exp_name`` iterations and the end of each epoch if runner.meta is not None and 'exp_name' in runner.meta: if (self.every_n_iters(runner, self.interval_exp_name)) or ( self.by_epoch and self.end_of_epoch(runner)): exp_info = f'Exp name: {runner.meta["exp_name"]}' runner.logger.info(exp_info) if log_dict['mode'] == 'train': if isinstance(log_dict['lr'], dict): lr_str = [] for k, val in log_dict['lr'].items(): lr_str.append(f'lr_{k}: {val:.3e}') lr_str = ' '.join(lr_str) else: lr_str = f'lr: {log_dict["lr"]:.3e}' # by epoch: Epoch [4][100/1000] # by iter: Iter [100/100000] if self.by_epoch: log_str = f'Epoch [{log_dict["epoch"]}]' \ f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' else: log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' log_str += f'{lr_str}, ' if 'time' in log_dict.keys(): self.time_sec_tot += (log_dict['time'] * self.interval) time_sec_avg = self.time_sec_tot / ( runner.iter - self.start_iter + 1) eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) eta_str = str(datetime.timedelta(seconds=int(eta_sec))) log_str += f'eta: {eta_str}, ' log_str += f'time: {log_dict["time"]:.3f}, ' \ f'data_time: {log_dict["data_time"]:.3f}, ' # statistic memory if torch.cuda.is_available(): log_str += f'memory: {log_dict["memory"]}, ' else: # val/test time # here 1000 is the length of the val dataloader # by epoch: Epoch[val] [4][1000] # by iter: Iter[val] [1000] if self.by_epoch: log_str = f'Epoch({log_dict["mode"]}) ' \ f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' else: log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' log_items = [] for name, val in log_dict.items(): # TODO: resolve this hack # these items have been in log_str if name in [ 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', 'memory', 'epoch' ]: continue if isinstance(val, float): val = f'{val:.4f}' log_items.append(f'{name}: {val}') log_str += ', '.join(log_items) runner.logger.info(log_str) def _dump_log(self, log_dict, runner): # dump log in json format json_log = OrderedDict() for k, v in log_dict.items(): json_log[k] = self._round_float(v) # only append log at last line if runner.rank == 0: with open(self.json_log_path, 'a+') as f: mmcv.dump(json_log, f, file_format='json') f.write('\n') def _round_float(self, items): if isinstance(items, list): return [self._round_float(item) for item in items] elif isinstance(items, float): return round(items, 5) else: return items def log(self, runner): if 'eval_iter_num' in runner.log_buffer.output: # this doesn't modify runner.iter and is regardless of by_epoch cur_iter = runner.log_buffer.output.pop('eval_iter_num') else: cur_iter = self.get_iter(runner, inner_iter=True) log_dict = OrderedDict( mode=self.get_mode(runner), epoch=self.get_epoch(runner), iter=cur_iter) # only record lr of the first param group cur_lr = runner.current_lr() if isinstance(cur_lr, list): log_dict['lr'] = cur_lr[0] else: assert isinstance(cur_lr, dict) log_dict['lr'] = {} for k, lr_ in cur_lr.items(): assert isinstance(lr_, list) log_dict['lr'].update({k: lr_[0]}) if 'time' in runner.log_buffer.output: # statistic memory if torch.cuda.is_available(): log_dict['memory'] = self._get_max_memory(runner) log_dict = dict(log_dict, **runner.log_buffer.output) self._log_info(log_dict, runner) self._dump_log(log_dict, runner) return log_dict def after_run(self, runner): # copy or upload logs to self.out_dir if self.out_dir is not None: for filename in scandir(runner.work_dir, self.out_suffix, True): local_filepath = osp.join(runner.work_dir, filename) out_filepath = self.file_client.join_path( self.out_dir, filename) with open(local_filepath, 'r') as f: self.file_client.put_text(f.read(), out_filepath) runner.logger.info( (f'The file {local_filepath} has been uploaded to ' f'{out_filepath}.')) if not self.keep_local: os.remove(local_filepath) runner.logger.info( (f'{local_filepath} was removed due to the ' '`self.keep_local=False`'))
Read the Docs v: v1.4.0
Versions
latest
stable
v1.4.0
v1.3.18
v1.3.17
v1.3.16
v1.3.15
v1.3.14
v1.3.13
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.