diff --git a/.gitignore b/.gitignore index 4681f8b57..42ce0e849 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ *.egg-info/ -*.pyc \ No newline at end of file +*.pyc +.vscode/ \ No newline at end of file diff --git a/bin/interactive.py b/bin/interactive.py index d726fb8a0..7cc4fe0fb 100755 --- a/bin/interactive.py +++ b/bin/interactive.py @@ -3,7 +3,7 @@ sys.path.insert(1, os.path.join(sys.path[0], '..')) import argparse -from multiagent.environment import MultiAgentEnv +from multiagent.envs.environment import MultiAgentEnv from multiagent.policy import InteractivePolicy import multiagent.scenarios as scenarios diff --git a/make_env.py b/make_env.py deleted file mode 100644 index 12c1ae946..000000000 --- a/make_env.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Code for creating a multiagent environment with one of the scenarios listed -in ./scenarios/. -Can be called by using, for example: - env = make_env('simple_speaker_listener') -After producing the env object, can be used similarly to an OpenAI gym -environment. - -A policy using this environment must output actions in the form of a list -for all agents. Each element of the list should be a numpy array, -of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede -communication actions in this array. See environment.py for more details. -""" - -def make_env(scenario_name, benchmark=False): - ''' - Creates a MultiAgentEnv object as env. This can be used similar to a gym - environment by calling env.reset() and env.step(). - Use env.render() to view the environment on the screen. - - Input: - scenario_name : name of the scenario from ./scenarios/ to be Returns - (without the .py extension) - benchmark : whether you want to produce benchmarking data - (usually only done during evaluation) - - Some useful env properties (see environment.py): - .observation_space : Returns the observation space for each agent - .action_space : Returns the action space for each agent - .n : Returns the number of Agents - ''' - from multiagent.environment import MultiAgentEnv - import multiagent.scenarios as scenarios - - # load scenario from script - scenario = scenarios.load(scenario_name + ".py").Scenario() - # create world - world = scenario.make_world() - # create multiagent environment - if benchmark: - env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) - else: - env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) - return env diff --git a/multiagent/__init__.py b/multiagent/__init__.py index 90b60076c..ce3592d9b 100644 --- a/multiagent/__init__.py +++ b/multiagent/__init__.py @@ -5,14 +5,5 @@ register( id='MultiagentSimple-v0', - entry_point='multiagent.envs:SimpleEnv', - # FIXME(cathywu) currently has to be exactly max_path_length parameters in - # rllab run script - max_episode_steps=100, -) - -register( - id='MultiagentSimpleSpeakerListener-v0', - entry_point='multiagent.envs:SimpleSpeakerListenerEnv', - max_episode_steps=100, + entry_point='multiagent.envs:MultiAgentEnv', ) diff --git a/multiagent/core.py b/multiagent/core.py deleted file mode 100644 index c0662ca0c..000000000 --- a/multiagent/core.py +++ /dev/null @@ -1,196 +0,0 @@ -import numpy as np - -# physical/external base state of all entites -class EntityState(object): - def __init__(self): - # physical position - self.p_pos = None - # physical velocity - self.p_vel = None - -# state of agents (including communication and internal/mental state) -class AgentState(EntityState): - def __init__(self): - super(AgentState, self).__init__() - # communication utterance - self.c = None - -# action of the agent -class Action(object): - def __init__(self): - # physical action - self.u = None - # communication action - self.c = None - -# properties and state of physical world entity -class Entity(object): - def __init__(self): - # name - self.name = '' - # properties: - self.size = 0.050 - # entity can move / be pushed - self.movable = False - # entity collides with others - self.collide = True - # material density (affects mass) - self.density = 25.0 - # color - self.color = None - # max speed and accel - self.max_speed = None - self.accel = None - # state - self.state = EntityState() - # mass - self.initial_mass = 1.0 - - @property - def mass(self): - return self.initial_mass - -# properties of landmark entities -class Landmark(Entity): - def __init__(self): - super(Landmark, self).__init__() - -# properties of agent entities -class Agent(Entity): - def __init__(self): - super(Agent, self).__init__() - # agents are movable by default - self.movable = True - # cannot send communication signals - self.silent = False - # cannot observe the world - self.blind = False - # physical motor noise amount - self.u_noise = None - # communication noise amount - self.c_noise = None - # control range - self.u_range = 1.0 - # state - self.state = AgentState() - # action - self.action = Action() - # script behavior to execute - self.action_callback = None - -# multi-agent world -class World(object): - def __init__(self): - # list of agents and entities (can change at execution-time!) - self.agents = [] - self.landmarks = [] - # communication channel dimensionality - self.dim_c = 0 - # position dimensionality - self.dim_p = 2 - # color dimensionality - self.dim_color = 3 - # simulation timestep - self.dt = 0.1 - # physical damping - self.damping = 0.25 - # contact response parameters - self.contact_force = 1e+2 - self.contact_margin = 1e-3 - - # return all entities in the world - @property - def entities(self): - return self.agents + self.landmarks - - # return all agents controllable by external policies - @property - def policy_agents(self): - return [agent for agent in self.agents if agent.action_callback is None] - - # return all agents controlled by world scripts - @property - def scripted_agents(self): - return [agent for agent in self.agents if agent.action_callback is not None] - - # update state of the world - def step(self): - # set actions for scripted agents - for agent in self.scripted_agents: - agent.action = agent.action_callback(agent, self) - # gather forces applied to entities - p_force = [None] * len(self.entities) - # apply agent physical controls - p_force = self.apply_action_force(p_force) - # apply environment forces - p_force = self.apply_environment_force(p_force) - # integrate physical state - self.integrate_state(p_force) - # update agent state - for agent in self.agents: - self.update_agent_state(agent) - - # gather agent action forces - def apply_action_force(self, p_force): - # set applied forces - for i,agent in enumerate(self.agents): - if agent.movable: - noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 - p_force[i] = agent.action.u + noise - return p_force - - # gather physical forces acting on entities - def apply_environment_force(self, p_force): - # simple (but inefficient) collision response - for a,entity_a in enumerate(self.entities): - for b,entity_b in enumerate(self.entities): - if(b <= a): continue - [f_a, f_b] = self.get_collision_force(entity_a, entity_b) - if(f_a is not None): - if(p_force[a] is None): p_force[a] = 0.0 - p_force[a] = f_a + p_force[a] - if(f_b is not None): - if(p_force[b] is None): p_force[b] = 0.0 - p_force[b] = f_b + p_force[b] - return p_force - - # integrate physical state - def integrate_state(self, p_force): - for i,entity in enumerate(self.entities): - if not entity.movable: continue - entity.state.p_vel = entity.state.p_vel * (1 - self.damping) - if (p_force[i] is not None): - entity.state.p_vel += (p_force[i] / entity.mass) * self.dt - if entity.max_speed is not None: - speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1])) - if speed > entity.max_speed: - entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) + - np.square(entity.state.p_vel[1])) * entity.max_speed - entity.state.p_pos += entity.state.p_vel * self.dt - - def update_agent_state(self, agent): - # set communication state (directly for now) - if agent.silent: - agent.state.c = np.zeros(self.dim_c) - else: - noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0 - agent.state.c = agent.action.c + noise - - # get collision forces for any contact between two entities - def get_collision_force(self, entity_a, entity_b): - if (not entity_a.collide) or (not entity_b.collide): - return [None, None] # not a collider - if (entity_a is entity_b): - return [None, None] # don't collide against itself - # compute actual distance between entities - delta_pos = entity_a.state.p_pos - entity_b.state.p_pos - dist = np.sqrt(np.sum(np.square(delta_pos))) - # minimum allowable distance - dist_min = entity_a.size + entity_b.size - # softmax penetration - k = self.contact_margin - penetration = np.logaddexp(0, -(dist - dist_min)/k)*k - force = self.contact_force * delta_pos / dist * penetration - force_a = +force if entity_a.movable else None - force_b = -force if entity_b.movable else None - return [force_a, force_b] \ No newline at end of file diff --git a/multiagent/environment.py b/multiagent/environment.py deleted file mode 100644 index d2e8d3278..000000000 --- a/multiagent/environment.py +++ /dev/null @@ -1,335 +0,0 @@ -import gym -from gym import spaces -from gym.envs.registration import EnvSpec -import numpy as np -from multiagent.multi_discrete import MultiDiscrete - -# environment for all agents in the multiagent world -# currently code assumes that no agents will be created/destroyed at runtime! -class MultiAgentEnv(gym.Env): - metadata = { - 'render.modes' : ['human', 'rgb_array'] - } - - def __init__(self, world, reset_callback=None, reward_callback=None, - observation_callback=None, info_callback=None, - done_callback=None, shared_viewer=True): - - self.world = world - self.agents = self.world.policy_agents - # set required vectorized gym env property - self.n = len(world.policy_agents) - # scenario callbacks - self.reset_callback = reset_callback - self.reward_callback = reward_callback - self.observation_callback = observation_callback - self.info_callback = info_callback - self.done_callback = done_callback - # environment parameters - self.discrete_action_space = True - # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector - self.discrete_action_input = False - # if true, even the action is continuous, action will be performed discretely - self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False - # if true, every agent has the same reward - self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False - self.time = 0 - - # configure spaces - self.action_space = [] - self.observation_space = [] - for agent in self.agents: - total_action_space = [] - # physical action space - if self.discrete_action_space: - u_action_space = spaces.Discrete(world.dim_p * 2 + 1) - else: - u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) - if agent.movable: - total_action_space.append(u_action_space) - # communication action space - if self.discrete_action_space: - c_action_space = spaces.Discrete(world.dim_c) - else: - c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) - if not agent.silent: - total_action_space.append(c_action_space) - # total action space - if len(total_action_space) > 1: - # all action spaces are discrete, so simplify to MultiDiscrete action space - if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): - act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) - else: - act_space = spaces.Tuple(total_action_space) - self.action_space.append(act_space) - else: - self.action_space.append(total_action_space[0]) - # observation space - obs_dim = len(observation_callback(agent, self.world)) - self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) - agent.action.c = np.zeros(self.world.dim_c) - - # rendering - self.shared_viewer = shared_viewer - if self.shared_viewer: - self.viewers = [None] - else: - self.viewers = [None] * self.n - self._reset_render() - - def step(self, action_n): - obs_n = [] - reward_n = [] - done_n = [] - info_n = {'n': []} - self.agents = self.world.policy_agents - # set action for each agent - for i, agent in enumerate(self.agents): - self._set_action(action_n[i], agent, self.action_space[i]) - # advance world state - self.world.step() - # record observation for each agent - for agent in self.agents: - obs_n.append(self._get_obs(agent)) - reward_n.append(self._get_reward(agent)) - done_n.append(self._get_done(agent)) - - info_n['n'].append(self._get_info(agent)) - - # all agents get total reward in cooperative case - reward = np.sum(reward_n) - if self.shared_reward: - reward_n = [reward] * self.n - - return obs_n, reward_n, done_n, info_n - - def reset(self): - # reset world - self.reset_callback(self.world) - # reset renderer - self._reset_render() - # record observations for each agent - obs_n = [] - self.agents = self.world.policy_agents - for agent in self.agents: - obs_n.append(self._get_obs(agent)) - return obs_n - - # get info used for benchmarking - def _get_info(self, agent): - if self.info_callback is None: - return {} - return self.info_callback(agent, self.world) - - # get observation for a particular agent - def _get_obs(self, agent): - if self.observation_callback is None: - return np.zeros(0) - return self.observation_callback(agent, self.world) - - # get dones for a particular agent - # unused right now -- agents are allowed to go beyond the viewing screen - def _get_done(self, agent): - if self.done_callback is None: - return False - return self.done_callback(agent, self.world) - - # get reward for a particular agent - def _get_reward(self, agent): - if self.reward_callback is None: - return 0.0 - return self.reward_callback(agent, self.world) - - # set env action for a particular agent - def _set_action(self, action, agent, action_space, time=None): - agent.action.u = np.zeros(self.world.dim_p) - agent.action.c = np.zeros(self.world.dim_c) - # process action - if isinstance(action_space, MultiDiscrete): - act = [] - size = action_space.high - action_space.low + 1 - index = 0 - for s in size: - act.append(action[index:(index+s)]) - index += s - action = act - else: - action = [action] - - if agent.movable: - # physical action - if self.discrete_action_input: - agent.action.u = np.zeros(self.world.dim_p) - # process discrete action - if action[0] == 1: agent.action.u[0] = -1.0 - if action[0] == 2: agent.action.u[0] = +1.0 - if action[0] == 3: agent.action.u[1] = -1.0 - if action[0] == 4: agent.action.u[1] = +1.0 - else: - if self.force_discrete_action: - d = np.argmax(action[0]) - action[0][:] = 0.0 - action[0][d] = 1.0 - if self.discrete_action_space: - agent.action.u[0] += action[0][1] - action[0][2] - agent.action.u[1] += action[0][3] - action[0][4] - else: - agent.action.u = action[0] - sensitivity = 5.0 - if agent.accel is not None: - sensitivity = agent.accel - agent.action.u *= sensitivity - action = action[1:] - if not agent.silent: - # communication action - if self.discrete_action_input: - agent.action.c = np.zeros(self.world.dim_c) - agent.action.c[action[0]] = 1.0 - else: - agent.action.c = action[0] - action = action[1:] - # make sure we used all elements of action - assert len(action) == 0 - - # reset rendering assets - def _reset_render(self): - self.render_geoms = None - self.render_geoms_xform = None - - # render environment - def render(self, mode='human'): - if mode == 'human': - alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - message = '' - for agent in self.world.agents: - comm = [] - for other in self.world.agents: - if other is agent: continue - if np.all(other.state.c == 0): - word = '_' - else: - word = alphabet[np.argmax(other.state.c)] - message += (other.name + ' to ' + agent.name + ': ' + word + ' ') - print(message) - - for i in range(len(self.viewers)): - # create viewers (if necessary) - if self.viewers[i] is None: - # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering - from multiagent import rendering - self.viewers[i] = rendering.Viewer(700,700) - - # create rendering geometry - if self.render_geoms is None: - # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering - from multiagent import rendering - self.render_geoms = [] - self.render_geoms_xform = [] - for entity in self.world.entities: - geom = rendering.make_circle(entity.size) - xform = rendering.Transform() - if 'agent' in entity.name: - geom.set_color(*entity.color, alpha=0.5) - else: - geom.set_color(*entity.color) - geom.add_attr(xform) - self.render_geoms.append(geom) - self.render_geoms_xform.append(xform) - - # add geoms to viewer - for viewer in self.viewers: - viewer.geoms = [] - for geom in self.render_geoms: - viewer.add_geom(geom) - - results = [] - for i in range(len(self.viewers)): - from multiagent import rendering - # update bounds to center around agent - cam_range = 1 - if self.shared_viewer: - pos = np.zeros(self.world.dim_p) - else: - pos = self.agents[i].state.p_pos - self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range) - # update geometry positions - for e, entity in enumerate(self.world.entities): - self.render_geoms_xform[e].set_translation(*entity.state.p_pos) - # render to display or array - results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) - - return results - - # create receptor field locations in local coordinate frame - def _make_receptor_locations(self, agent): - receptor_type = 'polar' - range_min = 0.05 * 2.0 - range_max = 1.00 - dx = [] - # circular receptive field - if receptor_type == 'polar': - for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): - for distance in np.linspace(range_min, range_max, 3): - dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) - # add origin - dx.append(np.array([0.0, 0.0])) - # grid receptive field - if receptor_type == 'grid': - for x in np.linspace(-range_max, +range_max, 5): - for y in np.linspace(-range_max, +range_max, 5): - dx.append(np.array([x,y])) - return dx - - -# vectorized wrapper for a batch of multi-agent environments -# assumes all environments have the same observation and action space -class BatchMultiAgentEnv(gym.Env): - metadata = { - 'runtime.vectorized': True, - 'render.modes' : ['human', 'rgb_array'] - } - - def __init__(self, env_batch): - self.env_batch = env_batch - - @property - def n(self): - return np.sum([env.n for env in self.env_batch]) - - @property - def action_space(self): - return self.env_batch[0].action_space - - @property - def observation_space(self): - return self.env_batch[0].observation_space - - def step(self, action_n, time): - obs_n = [] - reward_n = [] - done_n = [] - info_n = {'n': []} - i = 0 - for env in self.env_batch: - obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time) - i += env.n - obs_n += obs - # reward = [r / len(self.env_batch) for r in reward] - reward_n += reward - done_n += done - return obs_n, reward_n, done_n, info_n - - def reset(self): - obs_n = [] - for env in self.env_batch: - obs_n += env.reset() - return obs_n - - # render environment - def render(self, mode='human', close=True): - results_n = [] - for env in self.env_batch: - results_n += env.render(mode, close) - return results_n diff --git a/multiagent/envs/__init__.py b/multiagent/envs/__init__.py new file mode 100644 index 000000000..02882ab8f --- /dev/null +++ b/multiagent/envs/__init__.py @@ -0,0 +1 @@ +from multiagent.envs.multiagent_env import CustomMultiAgentEnv \ No newline at end of file diff --git a/multiagent/envs/agents/basic_agent.py b/multiagent/envs/agents/basic_agent.py new file mode 100644 index 000000000..c16df11c3 --- /dev/null +++ b/multiagent/envs/agents/basic_agent.py @@ -0,0 +1,63 @@ +import multiagent.envs.observations as observations +import multiagent.envs.rewards as rewards +import multiagent.envs.physics as physics +import numpy as np + +from gym.spaces import Box + +class BasicAgent(): + def __init__(self, id="agent", initial_conditions=None, physic="BasicPhysic", + observation="simple_goal", reward="linear_to_goal"): + + self.id = id + # Data used to reset agent + self._init_pos = initial_conditions['position']\ + if initial_conditions is not None and 'position' in initial_conditions\ + else np.zeros(2) # [x, y] + self._init_velocity = initial_conditions['velocity']\ + if initial_conditions is not None and 'velocity' in initial_conditions\ + else np.zeros(2) # [x, y] + self._init_color = initial_conditions['color']\ + if initial_conditions is not None and 'color' in initial_conditions\ + else np.zeros(4) # [r, g , b, alpha] + self._init_movable = initial_conditions['movable']\ + if initial_conditions is not None and 'movable' in initial_conditions\ + else True + self._init_max_speed = initial_conditions['max_speed']\ + if initial_conditions is not None and 'max_speed' in initial_conditions\ + else None + self._init_size = initial_conditions['size']\ + if initial_conditions is not None and 'size' in initial_conditions\ + else 0.02 + + # Agent functions and classes + self._observation = getattr(observations, observation) + self._reward = getattr(rewards, reward) + self._physic = getattr(physics, physic)() + + self.actions = np.zeros(2) # [x, y], de -1 à 1 + + self.action_space = Box(low=-1, high=+1, shape=self.actions.shape, dtype=np.float32) + self.observation_space = Box(low=-np.inf, high=+np.inf, shape=(6,), dtype=np.float32) + + # Init values by reseting agent + self.reset() + + def reset(self): + self.position = self._init_pos + self.velocity = self._init_velocity + self.color = self._init_color + self.movable = self._init_movable + self.max_speed = self._init_max_speed + self.size = self._init_size + + def observation(self, env): + return self._observation(self.id, env) + + def reward(self, env): + return self._reward(self.id, env) + + def step(self, action, env): + self.actions = action + + self._physic.move(self.id, self.actions, env) diff --git a/multiagent/envs/landmarks/basic_landmark.py b/multiagent/envs/landmarks/basic_landmark.py new file mode 100644 index 000000000..74431d4b6 --- /dev/null +++ b/multiagent/envs/landmarks/basic_landmark.py @@ -0,0 +1,24 @@ +import numpy as np + + +class BasicLandmark(): + def __init__(self, id="agent", type="obstacle", initial_conditions=None): + + self.id = id + self.type = type + # Data used to reset agent + self._init_pos = initial_conditions['position']\ + if initial_conditions is not None and 'position' in initial_conditions\ + else np.zeros(2) # [x, y] + self._init_color = initial_conditions['color']\ + if initial_conditions is not None and 'color' in initial_conditions\ + else np.zeros(4) # [r, g , b, alpha] + self._init_size = initial_conditions['size']\ + if initial_conditions is not None and 'size' in initial_conditions\ + else 0.05 + + + def reset(self): + self.position = self._init_pos + self.color = self._init_color + self.size = self._init_size diff --git a/multiagent/envs/multiagent_env.py b/multiagent/envs/multiagent_env.py new file mode 100644 index 000000000..a0b321397 --- /dev/null +++ b/multiagent/envs/multiagent_env.py @@ -0,0 +1,123 @@ +import gym + +import numpy as np + +from ray.rllib.env.multi_agent_env import MultiAgentEnv + + +class CustomMultiAgentEnv(MultiAgentEnv): + + def __init__(self, config): + #* Env + self.dones = set() + self.max_episode_step = config['max_episode_step'] + self.num_step = 0 + + #* Agents + self.agents = config['agents'] + # Set Obs and Act spaces for each agent. + self.action_spaces = [agent.action_space + for agent in self.agents] + self.observation_spaces = [agent.observation_space + for agent in self.agents] + + #* Landmarks + self.landmarks = config['landmarks'] + + #* Rendering + self.viewers = [None] + + def reset(self): + # Reset renderer + self._reset_render() + for agent in self.agents: + agent.reset() + for landmark in self.landmarks: + landmark.reset() + + self.num_step = 0 + + return { agent.id: agent.observation(self) for agent in self.agents } + + def step(self, actions): + obs, rew = {}, {} + + self.num_step += 1 + + # Apply every agent action + for agent in self.agents: + agent.step(actions[agent.id], self) + + # Get every agent actions, observations, infos and done state + for agent in self.agents: + obs[agent.id] = agent.observation(self) + rew[agent.id] = agent.reward(self) + + # Get agents information + info = self.get_agents_information() + # Check if some agents are done + # done = self.check_agent_done() + done = { + "__all__": len(self.dones) == len(self.agents) or\ + self.num_step == self.max_episode_step + } + + return obs, rew, done, info + + def get_agents_information(self): + return { agent.id: {} for agent in self.agents } + + def check_agent_done(self): + return { agent.id: False for agent in self.agents } + + # reset rendering assets + def _reset_render(self): + self.render_geoms = None + self.render_geoms_xform = None + + def render(self, mode="human"): + + for i in range(len(self.viewers)): + # create viewers (if necessary) + if self.viewers[i] is None: + # import rendering only if we need it (and don't import for headless machines) + #from gym.envs.classic_control import rendering + from multiagent import rendering + self.viewers[i] = rendering.Viewer(700,700) + + entities = self.agents + self.landmarks + # create rendering geometry + if self.render_geoms is None: + # import rendering only if we need it (and don't import for headless machines) + #from gym.envs.classic_control import rendering + from multiagent import rendering + self.render_geoms = [] + self.render_geoms_xform = [] + for entity in entities: + geom = rendering.make_circle(entity.size) + xform = rendering.Transform() + geom.set_color(*entity.color) + geom.add_attr(xform) + self.render_geoms.append(geom) + self.render_geoms_xform.append(xform) + + # add geoms to viewer + for viewer in self.viewers: + viewer.geoms = [] + for geom in self.render_geoms: + viewer.add_geom(geom) + + results = [] + for i in range(len(self.viewers)): + from multiagent import rendering + # update bounds to center around agent + cam_range = 1 + pos = np.zeros(2) + self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range) + # update geometry positions + for e, entity in enumerate(entities): + self.render_geoms_xform[e].set_translation(*entity.position) + # render to display or array + results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) + + return results \ No newline at end of file diff --git a/multiagent/envs/observations.py b/multiagent/envs/observations.py new file mode 100644 index 000000000..e709fe045 --- /dev/null +++ b/multiagent/envs/observations.py @@ -0,0 +1,17 @@ +import numpy as np + + +def self_observation(agent_id, env): + return np.concatenate((env.agents[agent_id].position, + env.agents[agent_id].velocity, + env.agents[agent_id].color), axis=0) + + +def simple_goal(agent_id, env): + agent = [agent for agent in env.agents if agent.id == agent_id][0] + goal = [ld for ld in env.landmarks if ld.type == 'goal'][0] + + return np.concatenate((agent.position, + agent.velocity, + goal.position), axis=0) + \ No newline at end of file diff --git a/multiagent/envs/physics.py b/multiagent/envs/physics.py new file mode 100644 index 000000000..f81f34f0a --- /dev/null +++ b/multiagent/envs/physics.py @@ -0,0 +1,27 @@ +import numpy as np + + +class BasicPhysic(): + + def __init__(self): + # physical damping + self.damping = 0.25 + # simulation timestep + self.dt = 0.1 + + def move(self, agent_id, action, env): + agent = [agent for agent in env.agents if agent.id == agent_id][0] + if agent.movable is False: + return + agent.velocity = agent.velocity * (1 - self.damping) + agent.velocity += action * self.dt + + if agent.max_speed is not None: + speed = np.sqrt(np.square(agent.velocity[0]) + np.square(agent.velocity[1])) + if speed > agent.max_speed: + agent.velocity = agent.velocity / speed * agent.max_speed + + # Avoid read-only numpy array + pos = np.copy(agent.position) + pos += agent.velocity * self.dt + agent.position = pos diff --git a/multiagent/envs/rewards.py b/multiagent/envs/rewards.py new file mode 100644 index 000000000..deab43428 --- /dev/null +++ b/multiagent/envs/rewards.py @@ -0,0 +1,9 @@ +import numpy as np + + +def linear_to_goal(agent_id, env): + agent_pos = [agent.position for agent in env.agents if agent.id == agent_id][0] + goal_pos = [ld.position for ld in env.landmarks if ld.type == 'goal'][0] + + dist2 = np.sum(np.square(agent_pos - goal_pos)) + return -dist2 \ No newline at end of file diff --git a/multiagent/multi_discrete.py b/multiagent/multi_discrete.py index d7108ad43..1f70d17d7 100644 --- a/multiagent/multi_discrete.py +++ b/multiagent/multi_discrete.py @@ -4,7 +4,7 @@ import numpy as np import gym -from gym.spaces import prng +# from gym.spaces import prng class MultiDiscrete(gym.Space): """ @@ -30,7 +30,8 @@ def __init__(self, array_of_param_array): def sample(self): """ Returns a array with one sample from each discrete action space """ # For each row: round(random .* (max - min) + min, 0) - random_array = prng.np_random.rand(self.num_discrete_space) + # random_array = prng.np_random.rand(self.num_discrete_space) #! Remove this so I don't have to downgrade gym + random_array = np.random.RandomState().rand(self.num_discrete_space) return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] def contains(self, x): return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() diff --git a/multiagent/rendering.py b/multiagent/rendering.py index cd00c7fb8..5a826bda6 100644 --- a/multiagent/rendering.py +++ b/multiagent/rendering.py @@ -11,18 +11,20 @@ os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib' # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite -from gym.utils import reraise +# from gym.utils import reraise from gym import error try: import pyglet except ImportError as e: - reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.") + # reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.") + raise ImportError try: from pyglet.gl import * except ImportError as e: - reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '") + # reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '") + raise ImportError import math import numpy as np diff --git a/multiagent/scenario.py b/multiagent/scenario.py deleted file mode 100644 index 02d86773e..000000000 --- a/multiagent/scenario.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np - -# defines scenario upon which the world is built -class BaseScenario(object): - # create elements of the world - def make_world(self): - raise NotImplementedError() - # create initial conditions of the world - def reset_world(self, world): - raise NotImplementedError() diff --git a/multiagent/scenarios/__init__.py b/multiagent/scenarios/__init__.py deleted file mode 100644 index 9a4b6f71a..000000000 --- a/multiagent/scenarios/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import imp -import os.path as osp - - -def load(name): - pathname = osp.join(osp.dirname(__file__), name) - return imp.load_source('', pathname) diff --git a/multiagent/scenarios/simple.py b/multiagent/scenarios/simple.py deleted file mode 100755 index 3ff80bedd..000000000 --- a/multiagent/scenarios/simple.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # add agents - world.agents = [Agent() for i in range(1)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = False - agent.silent = True - # add landmarks - world.landmarks = [Landmark() for i in range(1)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.25,0.25,0.25]) - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.75,0.75,0.75]) - world.landmarks[0].color = np.array([0.75,0.25,0.25]) - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def reward(self, agent, world): - dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos)) - return -dist2 - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - return np.concatenate([agent.state.p_vel] + entity_pos) diff --git a/multiagent/scenarios/simple_adversary.py b/multiagent/scenarios/simple_adversary.py deleted file mode 100644 index d897bb841..000000000 --- a/multiagent/scenarios/simple_adversary.py +++ /dev/null @@ -1,139 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - - -class Scenario(BaseScenario): - - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 2 - num_agents = 3 - world.num_agents = num_agents - num_adversaries = 1 - num_landmarks = num_agents - 1 - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = False - agent.silent = True - agent.adversary = True if i < num_adversaries else False - agent.size = 0.15 - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - landmark.size = 0.08 - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # random properties for agents - world.agents[0].color = np.array([0.85, 0.35, 0.35]) - for i in range(1, world.num_agents): - world.agents[i].color = np.array([0.35, 0.35, 0.85]) - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.15, 0.15, 0.15]) - # set goal landmark - goal = np.random.choice(world.landmarks) - goal.color = np.array([0.15, 0.65, 0.15]) - for agent in world.agents: - agent.goal_a = goal - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def benchmark_data(self, agent, world): - # returns data for benchmarking purposes - if agent.adversary: - return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)) - else: - dists = [] - for l in world.landmarks: - dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos))) - dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) - return tuple(dists) - - # return all agents that are not adversaries - def good_agents(self, world): - return [agent for agent in world.agents if not agent.adversary] - - # return all adversarial agents - def adversaries(self, world): - return [agent for agent in world.agents if agent.adversary] - - def reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) - - def agent_reward(self, agent, world): - # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it - shaped_reward = True - shaped_adv_reward = True - - # Calculate negative reward for adversary - adversary_agents = self.adversaries(world) - if shaped_adv_reward: # distance-based adversary reward - adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents]) - else: # proximity-based adversary reward (binary) - adv_rew = 0 - for a in adversary_agents: - if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size: - adv_rew -= 5 - - # Calculate positive reward for agents - good_agents = self.good_agents(world) - if shaped_reward: # distance-based agent reward - pos_rew = -min( - [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) - else: # proximity-based agent reward (binary) - pos_rew = 0 - if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \ - < 2 * agent.goal_a.size: - pos_rew += 5 - pos_rew -= min( - [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) - return pos_rew + adv_rew - - def adversary_reward(self, agent, world): - # Rewarded based on proximity to the goal landmark - shaped_reward = True - if shaped_reward: # distance-based reward - return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)) - else: # proximity-based reward (binary) - adv_rew = 0 - if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size: - adv_rew += 5 - return adv_rew - - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # entity colors - entity_color = [] - for entity in world.landmarks: - entity_color.append(entity.color) - # communication of all other agents - other_pos = [] - for other in world.agents: - if other is agent: continue - other_pos.append(other.state.p_pos - agent.state.p_pos) - - if not agent.adversary: - return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos) - else: - return np.concatenate(entity_pos + other_pos) diff --git a/multiagent/scenarios/simple_crypto.py b/multiagent/scenarios/simple_crypto.py deleted file mode 100644 index e1fd59237..000000000 --- a/multiagent/scenarios/simple_crypto.py +++ /dev/null @@ -1,169 +0,0 @@ -""" -Scenario: -1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from -adversary to goal. Adversary is rewarded for its distance to the goal. -""" - - -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario -import random - - -class CryptoAgent(Agent): - def __init__(self): - super(CryptoAgent, self).__init__() - self.key = None - -class Scenario(BaseScenario): - - def make_world(self): - world = World() - # set any world properties first - num_agents = 3 - num_adversaries = 1 - num_landmarks = 2 - world.dim_c = 4 - # add agents - world.agents = [CryptoAgent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = False - agent.adversary = True if i < num_adversaries else False - agent.speaker = True if i == 2 else False - agent.movable = False - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - # make initial conditions - self.reset_world(world) - return world - - - def reset_world(self, world): - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.25, 0.25, 0.25]) - if agent.adversary: - agent.color = np.array([0.75, 0.25, 0.25]) - agent.key = None - # random properties for landmarks - color_list = [np.zeros(world.dim_c) for i in world.landmarks] - for i, color in enumerate(color_list): - color[i] += 1 - for color, landmark in zip(color_list, world.landmarks): - landmark.color = color - # set goal landmark - goal = np.random.choice(world.landmarks) - world.agents[1].color = goal.color - world.agents[2].key = np.random.choice(world.landmarks).color - - for agent in world.agents: - agent.goal_a = goal - - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - - def benchmark_data(self, agent, world): - # returns data for benchmarking purposes - return (agent.state.c, agent.goal_a.color) - - # return all agents that are not adversaries - def good_listeners(self, world): - return [agent for agent in world.agents if not agent.adversary and not agent.speaker] - - # return all agents that are not adversaries - def good_agents(self, world): - return [agent for agent in world.agents if not agent.adversary] - - # return all adversarial agents - def adversaries(self, world): - return [agent for agent in world.agents if agent.adversary] - - def reward(self, agent, world): - return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) - - def agent_reward(self, agent, world): - # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot - good_listeners = self.good_listeners(world) - adversaries = self.adversaries(world) - good_rew = 0 - adv_rew = 0 - for a in good_listeners: - if (a.state.c == np.zeros(world.dim_c)).all(): - continue - else: - good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color)) - for a in adversaries: - if (a.state.c == np.zeros(world.dim_c)).all(): - continue - else: - adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color)) - adv_rew += adv_l1 - return adv_rew + good_rew - - def adversary_reward(self, agent, world): - # Adversary (Eve) is rewarded if it can reconstruct original goal - rew = 0 - if not (agent.state.c == np.zeros(world.dim_c)).all(): - rew -= np.sum(np.square(agent.state.c - agent.goal_a.color)) - return rew - - - def observation(self, agent, world): - # goal color - goal_color = np.zeros(world.dim_color) - if agent.goal_a is not None: - goal_color = agent.goal_a.color - - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # communication of all other agents - comm = [] - for other in world.agents: - if other is agent or (other.state.c is None) or not other.speaker: continue - comm.append(other.state.c) - - confer = np.array([0]) - - if world.agents[2].key is None: - confer = np.array([1]) - key = np.zeros(world.dim_c) - goal_color = np.zeros(world.dim_c) - else: - key = world.agents[2].key - - prnt = False - # speaker - if agent.speaker: - if prnt: - print('speaker') - print(agent.state.c) - print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)])) - return np.concatenate([goal_color] + [key]) - # listener - if not agent.speaker and not agent.adversary: - if prnt: - print('listener') - print(agent.state.c) - print(np.concatenate([key] + comm + [confer])) - return np.concatenate([key] + comm) - if not agent.speaker and agent.adversary: - if prnt: - print('adversary') - print(agent.state.c) - print(np.concatenate(comm + [confer])) - return np.concatenate(comm) diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py deleted file mode 100644 index 907fef5b5..000000000 --- a/multiagent/scenarios/simple_push.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 2 - num_agents = 2 - num_adversaries = 1 - num_landmarks = 2 - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = True - agent.silent = True - if i < num_adversaries: - agent.adversary = True - else: - agent.adversary = False - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.1, 0.1, 0.1]) - landmark.color[i + 1] += 0.8 - landmark.index = i - # set goal landmark - goal = np.random.choice(world.landmarks) - for i, agent in enumerate(world.agents): - agent.goal_a = goal - agent.color = np.array([0.25, 0.25, 0.25]) - if agent.adversary: - agent.color = np.array([0.75, 0.25, 0.25]) - else: - j = goal.index - agent.color[j + 1] += 0.5 - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) - - def agent_reward(self, agent, world): - # the distance to the goal - return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) - - def adversary_reward(self, agent, world): - # keep the nearest good agents away from the goal - agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary] - pos_rew = min(agent_dist) - #nearest_agent = world.good_agents[np.argmin(agent_dist)] - #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) - neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))) - #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) - return pos_rew - neg_rew - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: # world.entities: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # entity colors - entity_color = [] - for entity in world.landmarks: # world.entities: - entity_color.append(entity.color) - # communication of all other agents - comm = [] - other_pos = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - other_pos.append(other.state.p_pos - agent.state.p_pos) - if not agent.adversary: - return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos) - else: - #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network - return np.concatenate([agent.state.p_vel] + entity_pos + other_pos) diff --git a/multiagent/scenarios/simple_reference.py b/multiagent/scenarios/simple_reference.py deleted file mode 100755 index 2ea84a645..000000000 --- a/multiagent/scenarios/simple_reference.py +++ /dev/null @@ -1,81 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 10 - world.collaborative = True # whether agents share rewards - # add agents - world.agents = [Agent() for i in range(2)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = False - # add landmarks - world.landmarks = [Landmark() for i in range(3)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # assign goals to agents - for agent in world.agents: - agent.goal_a = None - agent.goal_b = None - # want other agent to go to the goal landmark - world.agents[0].goal_a = world.agents[1] - world.agents[0].goal_b = np.random.choice(world.landmarks) - world.agents[1].goal_a = world.agents[0] - world.agents[1].goal_b = np.random.choice(world.landmarks) - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.25,0.25,0.25]) - # random properties for landmarks - world.landmarks[0].color = np.array([0.75,0.25,0.25]) - world.landmarks[1].color = np.array([0.25,0.75,0.25]) - world.landmarks[2].color = np.array([0.25,0.25,0.75]) - # special colors for goals - world.agents[0].goal_a.color = world.agents[0].goal_b.color - world.agents[1].goal_a.color = world.agents[1].goal_b.color - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def reward(self, agent, world): - if agent.goal_a is None or agent.goal_b is None: - return 0.0 - dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos)) - return -dist2 - - def observation(self, agent, world): - # goal color - goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)] - if agent.goal_b is not None: - goal_color[1] = agent.goal_b.color - - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # entity colors - entity_color = [] - for entity in world.landmarks: - entity_color.append(entity.color) - # communication of all other agents - comm = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm) - \ No newline at end of file diff --git a/multiagent/scenarios/simple_speaker_listener.py b/multiagent/scenarios/simple_speaker_listener.py deleted file mode 100755 index 896e64e03..000000000 --- a/multiagent/scenarios/simple_speaker_listener.py +++ /dev/null @@ -1,92 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 3 - num_landmarks = 3 - world.collaborative = True - # add agents - world.agents = [Agent() for i in range(2)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = False - agent.size = 0.075 - # speaker - world.agents[0].movable = False - # listener - world.agents[1].silent = True - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - landmark.size = 0.04 - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # assign goals to agents - for agent in world.agents: - agent.goal_a = None - agent.goal_b = None - # want listener to go to the goal landmark - world.agents[0].goal_a = world.agents[1] - world.agents[0].goal_b = np.random.choice(world.landmarks) - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.25,0.25,0.25]) - # random properties for landmarks - world.landmarks[0].color = np.array([0.65,0.15,0.15]) - world.landmarks[1].color = np.array([0.15,0.65,0.15]) - world.landmarks[2].color = np.array([0.15,0.15,0.65]) - # special colors for goals - world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45]) - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def benchmark_data(self, agent, world): - # returns data for benchmarking purposes - return self.reward(agent, reward) - - def reward(self, agent, world): - # squared distance from listener to landmark - a = world.agents[0] - dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos)) - return -dist2 - - def observation(self, agent, world): - # goal color - goal_color = np.zeros(world.dim_color) - if agent.goal_b is not None: - goal_color = agent.goal_b.color - - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - - # communication of all other agents - comm = [] - for other in world.agents: - if other is agent or (other.state.c is None): continue - comm.append(other.state.c) - - # speaker - if not agent.movable: - return np.concatenate([goal_color]) - # listener - if agent.silent: - return np.concatenate([agent.state.p_vel] + entity_pos + comm) - diff --git a/multiagent/scenarios/simple_spread.py b/multiagent/scenarios/simple_spread.py deleted file mode 100644 index 263dfb4df..000000000 --- a/multiagent/scenarios/simple_spread.py +++ /dev/null @@ -1,100 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 2 - num_agents = 3 - num_landmarks = 3 - world.collaborative = True - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = True - agent.silent = True - agent.size = 0.15 - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = False - landmark.movable = False - # make initial conditions - self.reset_world(world) - return world - - def reset_world(self, world): - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.35, 0.35, 0.85]) - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.25, 0.25, 0.25]) - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def benchmark_data(self, agent, world): - rew = 0 - collisions = 0 - occupied_landmarks = 0 - min_dists = 0 - for l in world.landmarks: - dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] - min_dists += min(dists) - rew -= min(dists) - if min(dists) < 0.1: - occupied_landmarks += 1 - if agent.collide: - for a in world.agents: - if self.is_collision(a, agent): - rew -= 1 - collisions += 1 - return (rew, collisions, min_dists, occupied_landmarks) - - - def is_collision(self, agent1, agent2): - delta_pos = agent1.state.p_pos - agent2.state.p_pos - dist = np.sqrt(np.sum(np.square(delta_pos))) - dist_min = agent1.size + agent2.size - return True if dist < dist_min else False - - def reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions - rew = 0 - for l in world.landmarks: - dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] - rew -= min(dists) - if agent.collide: - for a in world.agents: - if self.is_collision(a, agent): - rew -= 1 - return rew - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: # world.entities: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # entity colors - entity_color = [] - for entity in world.landmarks: # world.entities: - entity_color.append(entity.color) - # communication of all other agents - comm = [] - other_pos = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - other_pos.append(other.state.p_pos - agent.state.p_pos) - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm) diff --git a/multiagent/scenarios/simple_tag.py b/multiagent/scenarios/simple_tag.py deleted file mode 100644 index e4b65b646..000000000 --- a/multiagent/scenarios/simple_tag.py +++ /dev/null @@ -1,147 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 2 - num_good_agents = 1 - num_adversaries = 3 - num_agents = num_adversaries + num_good_agents - num_landmarks = 2 - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = True - agent.silent = True - agent.adversary = True if i < num_adversaries else False - agent.size = 0.075 if agent.adversary else 0.05 - agent.accel = 3.0 if agent.adversary else 4.0 - #agent.accel = 20.0 if agent.adversary else 25.0 - agent.max_speed = 1.0 if agent.adversary else 1.3 - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = True - landmark.movable = False - landmark.size = 0.2 - landmark.boundary = False - # make initial conditions - self.reset_world(world) - return world - - - def reset_world(self, world): - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35]) - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.25, 0.25, 0.25]) - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - if not landmark.boundary: - landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - - def benchmark_data(self, agent, world): - # returns data for benchmarking purposes - if agent.adversary: - collisions = 0 - for a in self.good_agents(world): - if self.is_collision(a, agent): - collisions += 1 - return collisions - else: - return 0 - - - def is_collision(self, agent1, agent2): - delta_pos = agent1.state.p_pos - agent2.state.p_pos - dist = np.sqrt(np.sum(np.square(delta_pos))) - dist_min = agent1.size + agent2.size - return True if dist < dist_min else False - - # return all agents that are not adversaries - def good_agents(self, world): - return [agent for agent in world.agents if not agent.adversary] - - # return all adversarial agents - def adversaries(self, world): - return [agent for agent in world.agents if agent.adversary] - - - def reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) - return main_reward - - def agent_reward(self, agent, world): - # Agents are negatively rewarded if caught by adversaries - rew = 0 - shape = False - adversaries = self.adversaries(world) - if shape: # reward can optionally be shaped (increased reward for increased distance from adversary) - for adv in adversaries: - rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos))) - if agent.collide: - for a in adversaries: - if self.is_collision(a, agent): - rew -= 10 - - # agents are penalized for exiting the screen, so that they can be caught by the adversaries - def bound(x): - if x < 0.9: - return 0 - if x < 1.0: - return (x - 0.9) * 10 - return min(np.exp(2 * x - 2), 10) - for p in range(world.dim_p): - x = abs(agent.state.p_pos[p]) - rew -= bound(x) - - return rew - - def adversary_reward(self, agent, world): - # Adversaries are rewarded for collisions with agents - rew = 0 - shape = False - agents = self.good_agents(world) - adversaries = self.adversaries(world) - if shape: # reward can optionally be shaped (decreased reward for increased distance from agents) - for adv in adversaries: - rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents]) - if agent.collide: - for ag in agents: - for adv in adversaries: - if self.is_collision(ag, adv): - rew += 10 - return rew - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - if not entity.boundary: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # communication of all other agents - comm = [] - other_pos = [] - other_vel = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - other_pos.append(other.state.p_pos - agent.state.p_pos) - if not other.adversary: - other_vel.append(other.state.p_vel) - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel) diff --git a/multiagent/scenarios/simple_world_comm.py b/multiagent/scenarios/simple_world_comm.py deleted file mode 100644 index 499456e40..000000000 --- a/multiagent/scenarios/simple_world_comm.py +++ /dev/null @@ -1,289 +0,0 @@ -import numpy as np -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - - -class Scenario(BaseScenario): - def make_world(self): - world = World() - # set any world properties first - world.dim_c = 4 - #world.damping = 1 - num_good_agents = 2 - num_adversaries = 4 - num_agents = num_adversaries + num_good_agents - num_landmarks = 1 - num_food = 2 - num_forests = 2 - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = True - agent.leader = True if i == 0 else False - agent.silent = True if i > 0 else False - agent.adversary = True if i < num_adversaries else False - agent.size = 0.075 if agent.adversary else 0.045 - agent.accel = 3.0 if agent.adversary else 4.0 - #agent.accel = 20.0 if agent.adversary else 25.0 - agent.max_speed = 1.0 if agent.adversary else 1.3 - # add landmarks - world.landmarks = [Landmark() for i in range(num_landmarks)] - for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i - landmark.collide = True - landmark.movable = False - landmark.size = 0.2 - landmark.boundary = False - world.food = [Landmark() for i in range(num_food)] - for i, landmark in enumerate(world.food): - landmark.name = 'food %d' % i - landmark.collide = False - landmark.movable = False - landmark.size = 0.03 - landmark.boundary = False - world.forests = [Landmark() for i in range(num_forests)] - for i, landmark in enumerate(world.forests): - landmark.name = 'forest %d' % i - landmark.collide = False - landmark.movable = False - landmark.size = 0.3 - landmark.boundary = False - world.landmarks += world.food - world.landmarks += world.forests - #world.landmarks += self.set_boundaries(world) # world boundaries now penalized with negative reward - # make initial conditions - self.reset_world(world) - return world - - def set_boundaries(self, world): - boundary_list = [] - landmark_size = 1 - edge = 1 + landmark_size - num_landmarks = int(edge * 2 / landmark_size) - for x_pos in [-edge, edge]: - for i in range(num_landmarks): - l = Landmark() - l.state.p_pos = np.array([x_pos, -1 + i * landmark_size]) - boundary_list.append(l) - - for y_pos in [-edge, edge]: - for i in range(num_landmarks): - l = Landmark() - l.state.p_pos = np.array([-1 + i * landmark_size, y_pos]) - boundary_list.append(l) - - for i, l in enumerate(boundary_list): - l.name = 'boundary %d' % i - l.collide = True - l.movable = False - l.boundary = True - l.color = np.array([0.75, 0.75, 0.75]) - l.size = landmark_size - l.state.p_vel = np.zeros(world.dim_p) - - return boundary_list - - - def reset_world(self, world): - # random properties for agents - for i, agent in enumerate(world.agents): - agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45]) - agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0]) - # random properties for landmarks - for i, landmark in enumerate(world.landmarks): - landmark.color = np.array([0.25, 0.25, 0.25]) - for i, landmark in enumerate(world.food): - landmark.color = np.array([0.15, 0.15, 0.65]) - for i, landmark in enumerate(world.forests): - landmark.color = np.array([0.6, 0.9, 0.6]) - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - for i, landmark in enumerate(world.landmarks): - landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - for i, landmark in enumerate(world.food): - landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - for i, landmark in enumerate(world.forests): - landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) - landmark.state.p_vel = np.zeros(world.dim_p) - - def benchmark_data(self, agent, world): - if agent.adversary: - collisions = 0 - for a in self.good_agents(world): - if self.is_collision(a, agent): - collisions += 1 - return collisions - else: - return 0 - - - def is_collision(self, agent1, agent2): - delta_pos = agent1.state.p_pos - agent2.state.p_pos - dist = np.sqrt(np.sum(np.square(delta_pos))) - dist_min = agent1.size + agent2.size - return True if dist < dist_min else False - - - # return all agents that are not adversaries - def good_agents(self, world): - return [agent for agent in world.agents if not agent.adversary] - - # return all adversarial agents - def adversaries(self, world): - return [agent for agent in world.agents if agent.adversary] - - - def reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - #boundary_reward = -10 if self.outside_boundary(agent) else 0 - main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) - return main_reward - - def outside_boundary(self, agent): - if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1: - return True - else: - return False - - - def agent_reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - rew = 0 - shape = False - adversaries = self.adversaries(world) - if shape: - for adv in adversaries: - rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos))) - if agent.collide: - for a in adversaries: - if self.is_collision(a, agent): - rew -= 5 - def bound(x): - if x < 0.9: - return 0 - if x < 1.0: - return (x - 0.9) * 10 - return min(np.exp(2 * x - 2), 10) # 1 + (x - 1) * (x - 1) - - for p in range(world.dim_p): - x = abs(agent.state.p_pos[p]) - rew -= 2 * bound(x) - - for food in world.food: - if self.is_collision(agent, food): - rew += 2 - rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food]) - - return rew - - def adversary_reward(self, agent, world): - # Agents are rewarded based on minimum agent distance to each landmark - rew = 0 - shape = True - agents = self.good_agents(world) - adversaries = self.adversaries(world) - if shape: - rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents]) - if agent.collide: - for ag in agents: - for adv in adversaries: - if self.is_collision(ag, adv): - rew += 5 - return rew - - - def observation2(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - if not entity.boundary: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - - food_pos = [] - for entity in world.food: - if not entity.boundary: - food_pos.append(entity.state.p_pos - agent.state.p_pos) - # communication of all other agents - comm = [] - other_pos = [] - other_vel = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - other_pos.append(other.state.p_pos - agent.state.p_pos) - if not other.adversary: - other_vel.append(other.state.p_vel) - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel) - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: - if not entity.boundary: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - - in_forest = [np.array([-1]), np.array([-1])] - inf1 = False - inf2 = False - if self.is_collision(agent, world.forests[0]): - in_forest[0] = np.array([1]) - inf1= True - if self.is_collision(agent, world.forests[1]): - in_forest[1] = np.array([1]) - inf2 = True - - food_pos = [] - for entity in world.food: - if not entity.boundary: - food_pos.append(entity.state.p_pos - agent.state.p_pos) - # communication of all other agents - comm = [] - other_pos = [] - other_vel = [] - for other in world.agents: - if other is agent: continue - comm.append(other.state.c) - oth_f1 = self.is_collision(other, world.forests[0]) - oth_f2 = self.is_collision(other, world.forests[1]) - if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader: #without forest vis - other_pos.append(other.state.p_pos - agent.state.p_pos) - if not other.adversary: - other_vel.append(other.state.p_vel) - else: - other_pos.append([0, 0]) - if not other.adversary: - other_vel.append([0, 0]) - - # to tell the pred when the prey are in the forest - prey_forest = [] - ga = self.good_agents(world) - for a in ga: - if any([self.is_collision(a, f) for f in world.forests]): - prey_forest.append(np.array([1])) - else: - prey_forest.append(np.array([-1])) - # to tell leader when pred are in forest - prey_forest_lead = [] - for f in world.forests: - if any([self.is_collision(a, f) for a in ga]): - prey_forest_lead.append(np.array([1])) - else: - prey_forest_lead.append(np.array([-1])) - - comm = [world.agents[0].state.c] - - if agent.adversary and not agent.leader: - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm) - if agent.leader: - return np.concatenate( - [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm) - else: - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel) - - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..39fca3b46 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,87 @@ +absl-py==0.11.0 +aiohttp==3.7.3 +aiohttp-cors==0.7.0 +aioredis==1.3.1 +astunparse==1.6.3 +async-timeout==3.0.1 +atari-py==0.2.6 +attrs==20.3.0 +blessings==1.7 +cachetools==4.2.0 +certifi==2020.12.5 +chardet==3.0.4 +click==7.1.2 +cloudpickle==1.6.0 +colorama==0.4.4 +colorful==0.5.4 +contextvars==2.4 +dataclasses==0.8 +dm-tree==0.1.5 +filelock==3.0.12 +flatbuffers==1.12 +future==0.18.2 +gast==0.3.3 +google-api-core==1.24.1 +google-auth==1.24.0 +google-auth-oauthlib==0.4.2 +google-pasta==0.2.0 +googleapis-common-protos==1.52.0 +gpustat==0.6.0 +grpcio==1.32.0 +gym==0.18.0 +h5py==2.10.0 +hiredis==1.1.0 +idna==2.10 +idna-ssl==1.1.0 +immutables==0.14 +importlib-metadata==3.4.0 +jsonschema==3.2.0 +Keras-Preprocessing==1.1.2 +lz4==3.1.1 +Markdown==3.3.3 +msgpack==1.0.2 +-e git+git@github.com:DrArtemi/multiagent-particle-envs.git@b2f65814be5242d406b45a1ef9fc989e042233f0#egg=multiagent +multidict==5.1.0 +numpy==1.19.5 +numpy-stl==2.13.1 +nvidia-ml-py3==7.352.0 +oauthlib==3.1.0 +opencensus==0.7.11 +opencensus-context==0.1.2 +opencv-python==4.5.1.48 +opencv-python-headless==4.3.0.36 +opt-einsum==3.3.0 +pandas==1.1.5 +Pillow==7.2.0 +prometheus-client==0.9.0 +protobuf==3.14.0 +psutil==5.8.0 +py-spy==0.3.3 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyglet==1.5.0 +pyrsistent==0.17.3 +python-dateutil==2.8.1 +python-utils==2.4.0 +pytz==2020.5 +PyYAML==5.3.1 +ray==1.1.0 +redis==3.5.3 +requests==2.25.1 +requests-oauthlib==1.3.0 +rsa==4.7 +scipy==1.5.4 +six==1.15.0 +tabulate==0.8.7 +tensorboard==2.4.0 +tensorboard-plugin-wit==1.7.0 +tensorboardX==2.1 +tensorflow==2.4.0 +tensorflow-estimator==2.4.0 +termcolor==1.1.0 +typing-extensions==3.7.4.3 +urllib3==1.26.2 +Werkzeug==1.0.1 +wrapt==1.12.1 +yarl==1.6.3 +zipp==3.4.0