mmcv.runner.hooks.logger.text 源代码
# Copyright (c) OpenMMLab. All rights reserved.
import datetime
import os
import os.path as osp
from collections import OrderedDict
from typing import Dict, Optional, Union
import torch
import torch.distributed as dist
import mmcv
from mmcv.fileio.file_client import FileClient
from mmcv.utils import is_tuple_of, scandir
from ..hook import HOOKS
from .base import LoggerHook
[文档]@HOOKS.register_module()
class TextLoggerHook(LoggerHook):
"""Logger hook in text.
In this logger hook, the information will be printed on terminal and
saved in json file.
Args:
by_epoch (bool, optional): Whether EpochBasedRunner is used.
Default: True.
interval (int, optional): Logging interval (every k iterations).
Default: 10.
ignore_last (bool, optional): Ignore the log of last iterations in each
epoch if less than :attr:`interval`. Default: True.
reset_flag (bool, optional): Whether to clear the output buffer after
logging. Default: False.
interval_exp_name (int, optional): Logging interval for experiment
name. This feature is to help users conveniently get the experiment
information from screen or log file. Default: 1000.
out_dir (str, optional): Logs are saved in ``runner.work_dir`` default.
If ``out_dir`` is specified, logs will be copied to a new directory
which is the concatenation of ``out_dir`` and the last level
directory of ``runner.work_dir``. Default: None.
`New in version 1.3.16.`
out_suffix (str or tuple[str], optional): Those filenames ending with
``out_suffix`` will be copied to ``out_dir``.
Default: ('.log.json', '.log', '.py').
`New in version 1.3.16.`
keep_local (bool, optional): Whether to keep local log when
:attr:`out_dir` is specified. If False, the local log will be
removed. Default: True.
`New in version 1.3.16.`
file_client_args (dict, optional): Arguments to instantiate a
FileClient. See :class:`mmcv.fileio.FileClient` for details.
Default: None.
`New in version 1.3.16.`
"""
def __init__(self,
by_epoch: bool = True,
interval: int = 10,
ignore_last: bool = True,
reset_flag: bool = False,
interval_exp_name: int = 1000,
out_dir: Optional[str] = None,
out_suffix: Union[str, tuple] = ('.log.json', '.log', '.py'),
keep_local: bool = True,
file_client_args: Optional[Dict] = None):
super().__init__(interval, ignore_last, reset_flag, by_epoch)
self.by_epoch = by_epoch
self.time_sec_tot = 0
self.interval_exp_name = interval_exp_name
if out_dir is None and file_client_args is not None:
raise ValueError(
'file_client_args should be "None" when `out_dir` is not'
'specified.')
self.out_dir = out_dir
if not (out_dir is None or isinstance(out_dir, str)
or is_tuple_of(out_dir, str)):
raise TypeError('out_dir should be "None" or string or tuple of '
'string, but got {out_dir}')
self.out_suffix = out_suffix
self.keep_local = keep_local
self.file_client_args = file_client_args
if self.out_dir is not None:
self.file_client = FileClient.infer_client(file_client_args,
self.out_dir)
def before_run(self, runner) -> None:
super().before_run(runner)
if self.out_dir is not None:
self.file_client = FileClient.infer_client(self.file_client_args,
self.out_dir)
# The final `self.out_dir` is the concatenation of `self.out_dir`
# and the last level directory of `runner.work_dir`
basename = osp.basename(runner.work_dir.rstrip(osp.sep))
self.out_dir = self.file_client.join_path(self.out_dir, basename)
runner.logger.info(
f'Text logs will be saved to {self.out_dir} by '
f'{self.file_client.name} after the training process.')
self.start_iter = runner.iter
self.json_log_path = osp.join(runner.work_dir,
f'{runner.timestamp}.log.json')
if runner.meta is not None:
self._dump_log(runner.meta, runner)
def _get_max_memory(self, runner) -> int:
device = getattr(runner.model, 'output_device', None)
mem = torch.cuda.max_memory_allocated(device=device)
mem_mb = torch.tensor([int(mem) // (1024 * 1024)],
dtype=torch.int,
device=device)
if runner.world_size > 1:
dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
return mem_mb.item()
def _log_info(self, log_dict: Dict, runner) -> None:
# print exp name for users to distinguish experiments
# at every ``interval_exp_name`` iterations and the end of each epoch
if runner.meta is not None and 'exp_name' in runner.meta:
if (self.every_n_iters(runner, self.interval_exp_name)) or (
self.by_epoch and self.end_of_epoch(runner)):
exp_info = f'Exp name: {runner.meta["exp_name"]}'
runner.logger.info(exp_info)
if log_dict['mode'] == 'train':
if isinstance(log_dict['lr'], dict):
lr_str = []
for k, val in log_dict['lr'].items():
lr_str.append(f'lr_{k}: {val:.3e}')
lr_str = ' '.join(lr_str) # type: ignore
else:
lr_str = f'lr: {log_dict["lr"]:.3e}' # type: ignore
# by epoch: Epoch [4][100/1000]
# by iter: Iter [100/100000]
if self.by_epoch:
log_str = f'Epoch [{log_dict["epoch"]}]' \
f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
else:
log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
log_str += f'{lr_str}, '
if 'time' in log_dict.keys():
self.time_sec_tot += (log_dict['time'] * self.interval)
time_sec_avg = self.time_sec_tot / (
runner.iter - self.start_iter + 1)
eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
log_str += f'eta: {eta_str}, '
log_str += f'time: {log_dict["time"]:.3f}, ' \
f'data_time: {log_dict["data_time"]:.3f}, '
# statistic memory
if torch.cuda.is_available():
log_str += f'memory: {log_dict["memory"]}, '
else:
# val/test time
# here 1000 is the length of the val dataloader
# by epoch: Epoch[val] [4][1000]
# by iter: Iter[val] [1000]
if self.by_epoch:
log_str = f'Epoch({log_dict["mode"]}) ' \
f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
else:
log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
log_items = []
for name, val in log_dict.items():
# TODO: resolve this hack
# these items have been in log_str
if name in [
'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time',
'memory', 'epoch'
]:
continue
if isinstance(val, float):
val = f'{val:.4f}'
log_items.append(f'{name}: {val}')
log_str += ', '.join(log_items)
runner.logger.info(log_str)
def _dump_log(self, log_dict: Dict, runner) -> None:
# dump log in json format
json_log = OrderedDict()
for k, v in log_dict.items():
json_log[k] = self._round_float(v)
# only append log at last line
if runner.rank == 0:
with open(self.json_log_path, 'a+') as f:
mmcv.dump(json_log, f, file_format='json')
f.write('\n')
def _round_float(self, items):
if isinstance(items, list):
return [self._round_float(item) for item in items]
elif isinstance(items, float):
return round(items, 5)
else:
return items
def log(self, runner) -> OrderedDict:
if 'eval_iter_num' in runner.log_buffer.output:
# this doesn't modify runner.iter and is regardless of by_epoch
cur_iter = runner.log_buffer.output.pop('eval_iter_num')
else:
cur_iter = self.get_iter(runner, inner_iter=True)
log_dict = OrderedDict(
mode=self.get_mode(runner),
epoch=self.get_epoch(runner),
iter=cur_iter)
# only record lr of the first param group
cur_lr = runner.current_lr()
if isinstance(cur_lr, list):
log_dict['lr'] = cur_lr[0]
else:
assert isinstance(cur_lr, dict)
log_dict['lr'] = {}
for k, lr_ in cur_lr.items():
assert isinstance(lr_, list)
log_dict['lr'].update({k: lr_[0]})
if 'time' in runner.log_buffer.output:
# statistic memory
if torch.cuda.is_available():
log_dict['memory'] = self._get_max_memory(runner)
log_dict = dict(log_dict, **runner.log_buffer.output) # type: ignore
self._log_info(log_dict, runner)
self._dump_log(log_dict, runner)
return log_dict
def after_run(self, runner) -> None:
# copy or upload logs to self.out_dir
if self.out_dir is not None:
for filename in scandir(runner.work_dir, self.out_suffix, True):
local_filepath = osp.join(runner.work_dir, filename)
out_filepath = self.file_client.join_path(
self.out_dir, filename)
with open(local_filepath) as f:
self.file_client.put_text(f.read(), out_filepath)
runner.logger.info(
f'The file {local_filepath} has been uploaded to '
f'{out_filepath}.')
if not self.keep_local:
os.remove(local_filepath)
runner.logger.info(
f'{local_filepath} was removed due to the '
'`self.keep_local=False`')