Source code for digideep.environment.common.monitor

"""
The MIT License

Copyright (c) 2017 OpenAI (http://openai.com)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""


__all__ = ['Monitor', 'get_monitor_files', 'load_results']

import gym
from gym.core import Wrapper
import time
from glob import glob
import csv
import os.path as osp
import json
import numpy as np

[docs]class Monitor(Wrapper): EXT = "monitor.csv" f = None def __init__(self, env, filename, allow_early_resets=False, reset_keywords=(), info_keywords=()): Wrapper.__init__(self, env=env) self.tstart = time.time() self.results_writer = ResultsWriter( filename, header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id}, extra_keys=reset_keywords + info_keywords ) self.reset_keywords = reset_keywords self.info_keywords = info_keywords self.allow_early_resets = allow_early_resets self.rewards = None self.needs_reset = True self.episode_rewards = [] self.episode_lengths = [] self.episode_times = [] self.total_steps = 0 self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
[docs] def reset(self, **kwargs): self.reset_state() for k in self.reset_keywords: v = kwargs.get(k) if v is None: raise ValueError('Expected you to pass kwarg %s into reset'%k) self.current_reset_info[k] = v return self.env.reset(**kwargs)
[docs] def reset_state(self): if not self.allow_early_resets and not self.needs_reset: raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, wrap your env with Monitor(env, path, allow_early_resets=True)") self.rewards = [] self.needs_reset = False
[docs] def step(self, action): if self.needs_reset: raise RuntimeError("Tried to step environment that needs reset") ob, rew, done, info = self.env.step(action) self.update(ob, rew, done, info) return (ob, rew, done, info)
[docs] def update(self, ob, rew, done, info): self.rewards.append(rew) if done: self.needs_reset = True eprew = sum(self.rewards) eplen = len(self.rewards) epinfo = {"r": round(eprew, 6), "l": eplen, "t": round(time.time() - self.tstart, 6)} for k in self.info_keywords: epinfo[k] = info[k] self.episode_rewards.append(eprew) self.episode_lengths.append(eplen) self.episode_times.append(time.time() - self.tstart) epinfo.update(self.current_reset_info) self.results_writer.write_row(epinfo) if isinstance(info, dict): info['episode'] = epinfo self.total_steps += 1
[docs] def close(self): if self.f is not None: self.f.close()
[docs] def get_total_steps(self): return self.total_steps
[docs] def get_episode_rewards(self): return self.episode_rewards
[docs] def get_episode_lengths(self): return self.episode_lengths
[docs] def get_episode_times(self): return self.episode_times
class LoadMonitorResultsError(Exception): pass class ResultsWriter(object): def __init__(self, filename=None, header='', extra_keys=()): self.extra_keys = extra_keys if filename is None: self.f = None self.logger = None else: if not filename.endswith(Monitor.EXT): if osp.isdir(filename): filename = osp.join(filename, Monitor.EXT) else: filename = filename + "." + Monitor.EXT self.f = open(filename, "wt") if isinstance(header, dict): header = '# {} \n'.format(json.dumps(header)) self.f.write(header) self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+tuple(extra_keys)) self.logger.writeheader() self.f.flush() def write_row(self, epinfo): if self.logger: self.logger.writerow(epinfo) self.f.flush()
[docs]def get_monitor_files(dir): return glob(osp.join(dir, "*" + Monitor.EXT))
[docs]def load_results(dir): import pandas monitor_files = ( glob(osp.join(dir, "*monitor.json")) + glob(osp.join(dir, "*monitor.csv"))) # get both csv and (old) json files if not monitor_files: raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir)) dfs = [] headers = [] for fname in monitor_files: with open(fname, 'rt') as fh: if fname.endswith('csv'): firstline = fh.readline() if not firstline: continue assert firstline[0] == '#' header = json.loads(firstline[1:]) df = pandas.read_csv(fh, index_col=None) headers.append(header) elif fname.endswith('json'): # Deprecated json format episodes = [] lines = fh.readlines() header = json.loads(lines[0]) headers.append(header) for line in lines[1:]: episode = json.loads(line) episodes.append(episode) df = pandas.DataFrame(episodes) else: assert 0, 'unreachable' df['t'] += header['t_start'] dfs.append(df) df = pandas.concat(dfs) df.sort_values('t', inplace=True) df.reset_index(inplace=True) df['t'] -= min(header['t_start'] for header in headers) df.headers = headers # HACK to preserve backwards compatibility return df
def test_monitor(): env = gym.make("CartPole-v1") env.seed(0) mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4() menv = Monitor(env, mon_file) menv.reset() for _ in range(1000): _, _, done, _ = menv.step(0) if done: menv.reset() f = open(mon_file, 'rt') firstline = f.readline() assert firstline.startswith('#') metadata = json.loads(firstline[1:]) assert metadata['env_id'] == "CartPole-v1" assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'}, "Incorrect keys in monitor metadata" last_logline = pandas.read_csv(f, index_col=None) assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline" f.close() os.remove(mon_file)