diff --git a/.gitignore b/.gitignore
index 4681f8b57..42ce0e849 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__/
 *.egg-info/
-*.pyc
\ No newline at end of file
+*.pyc
+.vscode/
\ No newline at end of file
diff --git a/bin/interactive.py b/bin/interactive.py
index d726fb8a0..7cc4fe0fb 100755
--- a/bin/interactive.py
+++ b/bin/interactive.py
@@ -3,7 +3,7 @@
 sys.path.insert(1, os.path.join(sys.path[0], '..'))
 import argparse
 
-from multiagent.environment import MultiAgentEnv
+from multiagent.envs.environment import MultiAgentEnv
 from multiagent.policy import InteractivePolicy
 import multiagent.scenarios as scenarios
 
diff --git a/make_env.py b/make_env.py
deleted file mode 100644
index 12c1ae946..000000000
--- a/make_env.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Code for creating a multiagent environment with one of the scenarios listed
-in ./scenarios/.
-Can be called by using, for example:
-    env = make_env('simple_speaker_listener')
-After producing the env object, can be used similarly to an OpenAI gym
-environment.
-
-A policy using this environment must output actions in the form of a list
-for all agents. Each element of the list should be a numpy array,
-of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
-communication actions in this array. See environment.py for more details.
-"""
-
-def make_env(scenario_name, benchmark=False):
-    '''
-    Creates a MultiAgentEnv object as env. This can be used similar to a gym
-    environment by calling env.reset() and env.step().
-    Use env.render() to view the environment on the screen.
-
-    Input:
-        scenario_name   :   name of the scenario from ./scenarios/ to be Returns
-                            (without the .py extension)
-        benchmark       :   whether you want to produce benchmarking data
-                            (usually only done during evaluation)
-
-    Some useful env properties (see environment.py):
-        .observation_space  :   Returns the observation space for each agent
-        .action_space       :   Returns the action space for each agent
-        .n                  :   Returns the number of Agents
-    '''
-    from multiagent.environment import MultiAgentEnv
-    import multiagent.scenarios as scenarios
-
-    # load scenario from script
-    scenario = scenarios.load(scenario_name + ".py").Scenario()
-    # create world
-    world = scenario.make_world()
-    # create multiagent environment
-    if benchmark:        
-        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
-    else:
-        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
-    return env
diff --git a/multiagent/__init__.py b/multiagent/__init__.py
index 90b60076c..ce3592d9b 100644
--- a/multiagent/__init__.py
+++ b/multiagent/__init__.py
@@ -5,14 +5,5 @@
 
 register(
     id='MultiagentSimple-v0',
-    entry_point='multiagent.envs:SimpleEnv',
-    # FIXME(cathywu) currently has to be exactly max_path_length parameters in
-    # rllab run script
-    max_episode_steps=100,
-)
-
-register(
-    id='MultiagentSimpleSpeakerListener-v0',
-    entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
-    max_episode_steps=100,
+    entry_point='multiagent.envs:MultiAgentEnv',
 )
diff --git a/multiagent/core.py b/multiagent/core.py
deleted file mode 100644
index c0662ca0c..000000000
--- a/multiagent/core.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import numpy as np
-
-# physical/external base state of all entites
-class EntityState(object):
-    def __init__(self):
-        # physical position
-        self.p_pos = None
-        # physical velocity
-        self.p_vel = None
-
-# state of agents (including communication and internal/mental state)
-class AgentState(EntityState):
-    def __init__(self):
-        super(AgentState, self).__init__()
-        # communication utterance
-        self.c = None
-
-# action of the agent
-class Action(object):
-    def __init__(self):
-        # physical action
-        self.u = None
-        # communication action
-        self.c = None
-
-# properties and state of physical world entity
-class Entity(object):
-    def __init__(self):
-        # name 
-        self.name = ''
-        # properties:
-        self.size = 0.050
-        # entity can move / be pushed
-        self.movable = False
-        # entity collides with others
-        self.collide = True
-        # material density (affects mass)
-        self.density = 25.0
-        # color
-        self.color = None
-        # max speed and accel
-        self.max_speed = None
-        self.accel = None
-        # state
-        self.state = EntityState()
-        # mass
-        self.initial_mass = 1.0
-
-    @property
-    def mass(self):
-        return self.initial_mass
-
-# properties of landmark entities
-class Landmark(Entity):
-     def __init__(self):
-        super(Landmark, self).__init__()
-
-# properties of agent entities
-class Agent(Entity):
-    def __init__(self):
-        super(Agent, self).__init__()
-        # agents are movable by default
-        self.movable = True
-        # cannot send communication signals
-        self.silent = False
-        # cannot observe the world
-        self.blind = False
-        # physical motor noise amount
-        self.u_noise = None
-        # communication noise amount
-        self.c_noise = None
-        # control range
-        self.u_range = 1.0
-        # state
-        self.state = AgentState()
-        # action
-        self.action = Action()
-        # script behavior to execute
-        self.action_callback = None
-
-# multi-agent world
-class World(object):
-    def __init__(self):
-        # list of agents and entities (can change at execution-time!)
-        self.agents = []
-        self.landmarks = []
-        # communication channel dimensionality
-        self.dim_c = 0
-        # position dimensionality
-        self.dim_p = 2
-        # color dimensionality
-        self.dim_color = 3
-        # simulation timestep
-        self.dt = 0.1
-        # physical damping
-        self.damping = 0.25
-        # contact response parameters
-        self.contact_force = 1e+2
-        self.contact_margin = 1e-3
-
-    # return all entities in the world
-    @property
-    def entities(self):
-        return self.agents + self.landmarks
-
-    # return all agents controllable by external policies
-    @property
-    def policy_agents(self):
-        return [agent for agent in self.agents if agent.action_callback is None]
-
-    # return all agents controlled by world scripts
-    @property
-    def scripted_agents(self):
-        return [agent for agent in self.agents if agent.action_callback is not None]
-
-    # update state of the world
-    def step(self):
-        # set actions for scripted agents 
-        for agent in self.scripted_agents:
-            agent.action = agent.action_callback(agent, self)
-        # gather forces applied to entities
-        p_force = [None] * len(self.entities)
-        # apply agent physical controls
-        p_force = self.apply_action_force(p_force)
-        # apply environment forces
-        p_force = self.apply_environment_force(p_force)
-        # integrate physical state
-        self.integrate_state(p_force)
-        # update agent state
-        for agent in self.agents:
-            self.update_agent_state(agent)
-
-    # gather agent action forces
-    def apply_action_force(self, p_force):
-        # set applied forces
-        for i,agent in enumerate(self.agents):
-            if agent.movable:
-                noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
-                p_force[i] = agent.action.u + noise                
-        return p_force
-
-    # gather physical forces acting on entities
-    def apply_environment_force(self, p_force):
-        # simple (but inefficient) collision response
-        for a,entity_a in enumerate(self.entities):
-            for b,entity_b in enumerate(self.entities):
-                if(b <= a): continue
-                [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
-                if(f_a is not None):
-                    if(p_force[a] is None): p_force[a] = 0.0
-                    p_force[a] = f_a + p_force[a] 
-                if(f_b is not None):
-                    if(p_force[b] is None): p_force[b] = 0.0
-                    p_force[b] = f_b + p_force[b]        
-        return p_force
-
-    # integrate physical state
-    def integrate_state(self, p_force):
-        for i,entity in enumerate(self.entities):
-            if not entity.movable: continue
-            entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
-            if (p_force[i] is not None):
-                entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
-            if entity.max_speed is not None:
-                speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
-                if speed > entity.max_speed:
-                    entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
-                                                                  np.square(entity.state.p_vel[1])) * entity.max_speed
-            entity.state.p_pos += entity.state.p_vel * self.dt
-
-    def update_agent_state(self, agent):
-        # set communication state (directly for now)
-        if agent.silent:
-            agent.state.c = np.zeros(self.dim_c)
-        else:
-            noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
-            agent.state.c = agent.action.c + noise      
-
-    # get collision forces for any contact between two entities
-    def get_collision_force(self, entity_a, entity_b):
-        if (not entity_a.collide) or (not entity_b.collide):
-            return [None, None] # not a collider
-        if (entity_a is entity_b):
-            return [None, None] # don't collide against itself
-        # compute actual distance between entities
-        delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
-        dist = np.sqrt(np.sum(np.square(delta_pos)))
-        # minimum allowable distance
-        dist_min = entity_a.size + entity_b.size
-        # softmax penetration
-        k = self.contact_margin
-        penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
-        force = self.contact_force * delta_pos / dist * penetration
-        force_a = +force if entity_a.movable else None
-        force_b = -force if entity_b.movable else None
-        return [force_a, force_b]
\ No newline at end of file
diff --git a/multiagent/environment.py b/multiagent/environment.py
deleted file mode 100644
index d2e8d3278..000000000
--- a/multiagent/environment.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import gym
-from gym import spaces
-from gym.envs.registration import EnvSpec
-import numpy as np
-from multiagent.multi_discrete import MultiDiscrete
-
-# environment for all agents in the multiagent world
-# currently code assumes that no agents will be created/destroyed at runtime!
-class MultiAgentEnv(gym.Env):
-    metadata = {
-        'render.modes' : ['human', 'rgb_array']
-    }
-
-    def __init__(self, world, reset_callback=None, reward_callback=None,
-                 observation_callback=None, info_callback=None,
-                 done_callback=None, shared_viewer=True):
-
-        self.world = world
-        self.agents = self.world.policy_agents
-        # set required vectorized gym env property
-        self.n = len(world.policy_agents)
-        # scenario callbacks
-        self.reset_callback = reset_callback
-        self.reward_callback = reward_callback
-        self.observation_callback = observation_callback
-        self.info_callback = info_callback
-        self.done_callback = done_callback
-        # environment parameters
-        self.discrete_action_space = True
-        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
-        self.discrete_action_input = False
-        # if true, even the action is continuous, action will be performed discretely
-        self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
-        # if true, every agent has the same reward
-        self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
-        self.time = 0
-
-        # configure spaces
-        self.action_space = []
-        self.observation_space = []
-        for agent in self.agents:
-            total_action_space = []
-            # physical action space
-            if self.discrete_action_space:
-                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
-            else:
-                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
-            if agent.movable:
-                total_action_space.append(u_action_space)
-            # communication action space
-            if self.discrete_action_space:
-                c_action_space = spaces.Discrete(world.dim_c)
-            else:
-                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
-            if not agent.silent:
-                total_action_space.append(c_action_space)
-            # total action space
-            if len(total_action_space) > 1:
-                # all action spaces are discrete, so simplify to MultiDiscrete action space
-                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
-                    act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
-                else:
-                    act_space = spaces.Tuple(total_action_space)
-                self.action_space.append(act_space)
-            else:
-                self.action_space.append(total_action_space[0])
-            # observation space
-            obs_dim = len(observation_callback(agent, self.world))
-            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
-            agent.action.c = np.zeros(self.world.dim_c)
-
-        # rendering
-        self.shared_viewer = shared_viewer
-        if self.shared_viewer:
-            self.viewers = [None]
-        else:
-            self.viewers = [None] * self.n
-        self._reset_render()
-
-    def step(self, action_n):
-        obs_n = []
-        reward_n = []
-        done_n = []
-        info_n = {'n': []}
-        self.agents = self.world.policy_agents
-        # set action for each agent
-        for i, agent in enumerate(self.agents):
-            self._set_action(action_n[i], agent, self.action_space[i])
-        # advance world state
-        self.world.step()
-        # record observation for each agent
-        for agent in self.agents:
-            obs_n.append(self._get_obs(agent))
-            reward_n.append(self._get_reward(agent))
-            done_n.append(self._get_done(agent))
-
-            info_n['n'].append(self._get_info(agent))
-
-        # all agents get total reward in cooperative case
-        reward = np.sum(reward_n)
-        if self.shared_reward:
-            reward_n = [reward] * self.n
-
-        return obs_n, reward_n, done_n, info_n
-
-    def reset(self):
-        # reset world
-        self.reset_callback(self.world)
-        # reset renderer
-        self._reset_render()
-        # record observations for each agent
-        obs_n = []
-        self.agents = self.world.policy_agents
-        for agent in self.agents:
-            obs_n.append(self._get_obs(agent))
-        return obs_n
-
-    # get info used for benchmarking
-    def _get_info(self, agent):
-        if self.info_callback is None:
-            return {}
-        return self.info_callback(agent, self.world)
-
-    # get observation for a particular agent
-    def _get_obs(self, agent):
-        if self.observation_callback is None:
-            return np.zeros(0)
-        return self.observation_callback(agent, self.world)
-
-    # get dones for a particular agent
-    # unused right now -- agents are allowed to go beyond the viewing screen
-    def _get_done(self, agent):
-        if self.done_callback is None:
-            return False
-        return self.done_callback(agent, self.world)
-
-    # get reward for a particular agent
-    def _get_reward(self, agent):
-        if self.reward_callback is None:
-            return 0.0
-        return self.reward_callback(agent, self.world)
-
-    # set env action for a particular agent
-    def _set_action(self, action, agent, action_space, time=None):
-        agent.action.u = np.zeros(self.world.dim_p)
-        agent.action.c = np.zeros(self.world.dim_c)
-        # process action
-        if isinstance(action_space, MultiDiscrete):
-            act = []
-            size = action_space.high - action_space.low + 1
-            index = 0
-            for s in size:
-                act.append(action[index:(index+s)])
-                index += s
-            action = act
-        else:
-            action = [action]
-
-        if agent.movable:
-            # physical action
-            if self.discrete_action_input:
-                agent.action.u = np.zeros(self.world.dim_p)
-                # process discrete action
-                if action[0] == 1: agent.action.u[0] = -1.0
-                if action[0] == 2: agent.action.u[0] = +1.0
-                if action[0] == 3: agent.action.u[1] = -1.0
-                if action[0] == 4: agent.action.u[1] = +1.0
-            else:
-                if self.force_discrete_action:
-                    d = np.argmax(action[0])
-                    action[0][:] = 0.0
-                    action[0][d] = 1.0
-                if self.discrete_action_space:
-                    agent.action.u[0] += action[0][1] - action[0][2]
-                    agent.action.u[1] += action[0][3] - action[0][4]
-                else:
-                    agent.action.u = action[0]
-            sensitivity = 5.0
-            if agent.accel is not None:
-                sensitivity = agent.accel
-            agent.action.u *= sensitivity
-            action = action[1:]
-        if not agent.silent:
-            # communication action
-            if self.discrete_action_input:
-                agent.action.c = np.zeros(self.world.dim_c)
-                agent.action.c[action[0]] = 1.0
-            else:
-                agent.action.c = action[0]
-            action = action[1:]
-        # make sure we used all elements of action
-        assert len(action) == 0
-
-    # reset rendering assets
-    def _reset_render(self):
-        self.render_geoms = None
-        self.render_geoms_xform = None
-
-    # render environment
-    def render(self, mode='human'):
-        if mode == 'human':
-            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-            message = ''
-            for agent in self.world.agents:
-                comm = []
-                for other in self.world.agents:
-                    if other is agent: continue
-                    if np.all(other.state.c == 0):
-                        word = '_'
-                    else:
-                        word = alphabet[np.argmax(other.state.c)]
-                    message += (other.name + ' to ' + agent.name + ': ' + word + '   ')
-            print(message)
-
-        for i in range(len(self.viewers)):
-            # create viewers (if necessary)
-            if self.viewers[i] is None:
-                # import rendering only if we need it (and don't import for headless machines)
-                #from gym.envs.classic_control import rendering
-                from multiagent import rendering
-                self.viewers[i] = rendering.Viewer(700,700)
-
-        # create rendering geometry
-        if self.render_geoms is None:
-            # import rendering only if we need it (and don't import for headless machines)
-            #from gym.envs.classic_control import rendering
-            from multiagent import rendering
-            self.render_geoms = []
-            self.render_geoms_xform = []
-            for entity in self.world.entities:
-                geom = rendering.make_circle(entity.size)
-                xform = rendering.Transform()
-                if 'agent' in entity.name:
-                    geom.set_color(*entity.color, alpha=0.5)
-                else:
-                    geom.set_color(*entity.color)
-                geom.add_attr(xform)
-                self.render_geoms.append(geom)
-                self.render_geoms_xform.append(xform)
-
-            # add geoms to viewer
-            for viewer in self.viewers:
-                viewer.geoms = []
-                for geom in self.render_geoms:
-                    viewer.add_geom(geom)
-
-        results = []
-        for i in range(len(self.viewers)):
-            from multiagent import rendering
-            # update bounds to center around agent
-            cam_range = 1
-            if self.shared_viewer:
-                pos = np.zeros(self.world.dim_p)
-            else:
-                pos = self.agents[i].state.p_pos
-            self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range)
-            # update geometry positions
-            for e, entity in enumerate(self.world.entities):
-                self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
-            # render to display or array
-            results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
-
-        return results
-
-    # create receptor field locations in local coordinate frame
-    def _make_receptor_locations(self, agent):
-        receptor_type = 'polar'
-        range_min = 0.05 * 2.0
-        range_max = 1.00
-        dx = []
-        # circular receptive field
-        if receptor_type == 'polar':
-            for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
-                for distance in np.linspace(range_min, range_max, 3):
-                    dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
-            # add origin
-            dx.append(np.array([0.0, 0.0]))
-        # grid receptive field
-        if receptor_type == 'grid':
-            for x in np.linspace(-range_max, +range_max, 5):
-                for y in np.linspace(-range_max, +range_max, 5):
-                    dx.append(np.array([x,y]))
-        return dx
-
-
-# vectorized wrapper for a batch of multi-agent environments
-# assumes all environments have the same observation and action space
-class BatchMultiAgentEnv(gym.Env):
-    metadata = {
-        'runtime.vectorized': True,
-        'render.modes' : ['human', 'rgb_array']
-    }
-
-    def __init__(self, env_batch):
-        self.env_batch = env_batch
-
-    @property
-    def n(self):
-        return np.sum([env.n for env in self.env_batch])
-
-    @property
-    def action_space(self):
-        return self.env_batch[0].action_space
-
-    @property
-    def observation_space(self):
-        return self.env_batch[0].observation_space
-
-    def step(self, action_n, time):
-        obs_n = []
-        reward_n = []
-        done_n = []
-        info_n = {'n': []}
-        i = 0
-        for env in self.env_batch:
-            obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time)
-            i += env.n
-            obs_n += obs
-            # reward = [r / len(self.env_batch) for r in reward]
-            reward_n += reward
-            done_n += done
-        return obs_n, reward_n, done_n, info_n
-
-    def reset(self):
-        obs_n = []
-        for env in self.env_batch:
-            obs_n += env.reset()
-        return obs_n
-
-    # render environment
-    def render(self, mode='human', close=True):
-        results_n = []
-        for env in self.env_batch:
-            results_n += env.render(mode, close)
-        return results_n
diff --git a/multiagent/envs/__init__.py b/multiagent/envs/__init__.py
new file mode 100644
index 000000000..02882ab8f
--- /dev/null
+++ b/multiagent/envs/__init__.py
@@ -0,0 +1 @@
+from multiagent.envs.multiagent_env import CustomMultiAgentEnv
\ No newline at end of file
diff --git a/multiagent/envs/agents/basic_agent.py b/multiagent/envs/agents/basic_agent.py
new file mode 100644
index 000000000..c16df11c3
--- /dev/null
+++ b/multiagent/envs/agents/basic_agent.py
@@ -0,0 +1,63 @@
+import multiagent.envs.observations as observations
+import multiagent.envs.rewards as rewards
+import multiagent.envs.physics as physics
+import numpy as np
+
+from gym.spaces import Box
+
+class BasicAgent():
+    def __init__(self, id="agent", initial_conditions=None, physic="BasicPhysic",
+                 observation="simple_goal", reward="linear_to_goal"):
+        
+        self.id = id
+        # Data used to reset agent
+        self._init_pos = initial_conditions['position']\
+            if initial_conditions is not None and 'position' in initial_conditions\
+                else np.zeros(2)  # [x, y]
+        self._init_velocity = initial_conditions['velocity']\
+            if initial_conditions is not None and 'velocity' in initial_conditions\
+                else np.zeros(2)  # [x, y]
+        self._init_color = initial_conditions['color']\
+            if initial_conditions is not None and 'color' in initial_conditions\
+                else np.zeros(4)  # [r, g , b, alpha]
+        self._init_movable = initial_conditions['movable']\
+            if initial_conditions is not None  and 'movable' in initial_conditions\
+                else True
+        self._init_max_speed = initial_conditions['max_speed']\
+            if initial_conditions is not None  and 'max_speed' in initial_conditions\
+                else None
+        self._init_size = initial_conditions['size']\
+            if initial_conditions is not None  and 'size' in initial_conditions\
+                else 0.02
+        
+        # Agent functions and classes
+        self._observation = getattr(observations, observation)
+        self._reward = getattr(rewards, reward)
+        self._physic = getattr(physics, physic)()
+        
+        self.actions = np.zeros(2)  # [x, y], de -1 à 1
+        
+        self.action_space = Box(low=-1, high=+1, shape=self.actions.shape, dtype=np.float32)
+        self.observation_space = Box(low=-np.inf, high=+np.inf, shape=(6,), dtype=np.float32)
+        
+        # Init values by reseting agent
+        self.reset()
+
+    def reset(self):
+        self.position = self._init_pos
+        self.velocity = self._init_velocity
+        self.color = self._init_color
+        self.movable = self._init_movable
+        self.max_speed = self._init_max_speed
+        self.size = self._init_size
+        
+    def observation(self, env):
+        return self._observation(self.id, env)
+        
+    def reward(self, env):
+        return self._reward(self.id, env)
+    
+    def step(self, action, env):
+        self.actions = action
+        
+        self._physic.move(self.id, self.actions, env)
diff --git a/multiagent/envs/landmarks/basic_landmark.py b/multiagent/envs/landmarks/basic_landmark.py
new file mode 100644
index 000000000..74431d4b6
--- /dev/null
+++ b/multiagent/envs/landmarks/basic_landmark.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+
+class BasicLandmark():
+    def __init__(self, id="agent", type="obstacle", initial_conditions=None):
+        
+        self.id = id
+        self.type = type
+        # Data used to reset agent
+        self._init_pos = initial_conditions['position']\
+            if initial_conditions is not None and 'position' in initial_conditions\
+                else np.zeros(2)  # [x, y]
+        self._init_color = initial_conditions['color']\
+            if initial_conditions is not None and 'color' in initial_conditions\
+                else np.zeros(4)  # [r, g , b, alpha]
+        self._init_size = initial_conditions['size']\
+            if initial_conditions is not None  and 'size' in initial_conditions\
+                else 0.05
+
+
+    def reset(self):
+        self.position = self._init_pos
+        self.color = self._init_color
+        self.size = self._init_size
diff --git a/multiagent/envs/multiagent_env.py b/multiagent/envs/multiagent_env.py
new file mode 100644
index 000000000..a0b321397
--- /dev/null
+++ b/multiagent/envs/multiagent_env.py
@@ -0,0 +1,123 @@
+import gym
+
+import numpy as np
+
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+
+
+class CustomMultiAgentEnv(MultiAgentEnv):
+    
+    def __init__(self, config):
+        #* Env
+        self.dones = set()
+        self.max_episode_step = config['max_episode_step']
+        self.num_step = 0
+        
+        #* Agents
+        self.agents = config['agents']
+        # Set Obs and Act spaces for each agent.
+        self.action_spaces = [agent.action_space
+                              for agent in self.agents]
+        self.observation_spaces = [agent.observation_space
+                                   for agent in self.agents]
+
+        #* Landmarks
+        self.landmarks = config['landmarks']
+        
+        #* Rendering
+        self.viewers = [None]
+    
+    def reset(self):
+        # Reset renderer
+        self._reset_render()
+        for agent in self.agents:
+            agent.reset()
+        for landmark in self.landmarks:
+            landmark.reset()
+            
+        self.num_step = 0
+
+        return { agent.id: agent.observation(self) for agent in self.agents }
+    
+    def step(self, actions):
+        obs, rew = {}, {}
+        
+        self.num_step += 1
+        
+        # Apply every agent action
+        for agent in self.agents:
+            agent.step(actions[agent.id], self)
+        
+        # Get every agent actions, observations, infos and done state
+        for agent in self.agents:
+            obs[agent.id] = agent.observation(self)
+            rew[agent.id] = agent.reward(self)
+        
+        # Get agents information
+        info = self.get_agents_information()
+        # Check if some agents are done
+        # done = self.check_agent_done()
+        done = {
+            "__all__": len(self.dones) == len(self.agents) or\
+                self.num_step == self.max_episode_step
+            }
+                
+        return obs, rew, done, info
+    
+    def get_agents_information(self):
+        return { agent.id: {} for agent in self.agents }
+    
+    def check_agent_done(self):
+        return { agent.id: False for agent in self.agents }
+    
+    # reset rendering assets
+    def _reset_render(self):
+        self.render_geoms = None
+        self.render_geoms_xform = None
+    
+    def render(self, mode="human"):
+
+        for i in range(len(self.viewers)):
+            # create viewers (if necessary)
+            if self.viewers[i] is None:
+                # import rendering only if we need it (and don't import for headless machines)
+                #from gym.envs.classic_control import rendering
+                from multiagent import rendering
+                self.viewers[i] = rendering.Viewer(700,700)
+
+        entities = self.agents + self.landmarks
+        # create rendering geometry
+        if self.render_geoms is None:
+            # import rendering only if we need it (and don't import for headless machines)
+            #from gym.envs.classic_control import rendering
+            from multiagent import rendering
+            self.render_geoms = []
+            self.render_geoms_xform = []
+            for entity in entities:
+                geom = rendering.make_circle(entity.size)
+                xform = rendering.Transform()
+                geom.set_color(*entity.color)
+                geom.add_attr(xform)
+                self.render_geoms.append(geom)
+                self.render_geoms_xform.append(xform)
+
+            # add geoms to viewer
+            for viewer in self.viewers:
+                viewer.geoms = []
+                for geom in self.render_geoms:
+                    viewer.add_geom(geom)
+
+        results = []
+        for i in range(len(self.viewers)):
+            from multiagent import rendering
+            # update bounds to center around agent
+            cam_range = 1
+            pos = np.zeros(2)
+            self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range)
+            # update geometry positions
+            for e, entity in enumerate(entities):
+                self.render_geoms_xform[e].set_translation(*entity.position)
+            # render to display or array
+            results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
+
+        return results
\ No newline at end of file
diff --git a/multiagent/envs/observations.py b/multiagent/envs/observations.py
new file mode 100644
index 000000000..e709fe045
--- /dev/null
+++ b/multiagent/envs/observations.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+
+def self_observation(agent_id, env):
+    return np.concatenate((env.agents[agent_id].position,
+                           env.agents[agent_id].velocity,
+                           env.agents[agent_id].color), axis=0)
+
+
+def simple_goal(agent_id, env):
+    agent = [agent for agent in env.agents if agent.id == agent_id][0]
+    goal = [ld for ld in env.landmarks if ld.type == 'goal'][0]
+    
+    return np.concatenate((agent.position,
+                           agent.velocity,
+                           goal.position), axis=0)
+    
\ No newline at end of file
diff --git a/multiagent/envs/physics.py b/multiagent/envs/physics.py
new file mode 100644
index 000000000..f81f34f0a
--- /dev/null
+++ b/multiagent/envs/physics.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+
+class BasicPhysic():
+    
+    def __init__(self):
+        # physical damping
+        self.damping = 0.25
+        # simulation timestep
+        self.dt = 0.1
+    
+    def move(self, agent_id, action, env):
+        agent = [agent for agent in env.agents if agent.id == agent_id][0]
+        if agent.movable is False:
+            return
+        agent.velocity = agent.velocity * (1 - self.damping)
+        agent.velocity += action * self.dt
+        
+        if agent.max_speed is not None:
+            speed = np.sqrt(np.square(agent.velocity[0]) + np.square(agent.velocity[1]))
+            if speed > agent.max_speed:
+                agent.velocity = agent.velocity / speed * agent.max_speed
+                
+        # Avoid read-only numpy array
+        pos = np.copy(agent.position)
+        pos += agent.velocity * self.dt
+        agent.position = pos
diff --git a/multiagent/envs/rewards.py b/multiagent/envs/rewards.py
new file mode 100644
index 000000000..deab43428
--- /dev/null
+++ b/multiagent/envs/rewards.py
@@ -0,0 +1,9 @@
+import numpy as np
+
+
+def linear_to_goal(agent_id, env):
+    agent_pos = [agent.position for agent in env.agents if agent.id == agent_id][0]
+    goal_pos = [ld.position for ld in env.landmarks if ld.type == 'goal'][0]
+    
+    dist2 = np.sum(np.square(agent_pos - goal_pos))
+    return -dist2
\ No newline at end of file
diff --git a/multiagent/multi_discrete.py b/multiagent/multi_discrete.py
index d7108ad43..1f70d17d7 100644
--- a/multiagent/multi_discrete.py
+++ b/multiagent/multi_discrete.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 import gym
-from gym.spaces import prng
+# from gym.spaces import prng
 
 class MultiDiscrete(gym.Space):
     """
@@ -30,7 +30,8 @@ def __init__(self, array_of_param_array):
     def sample(self):
         """ Returns a array with one sample from each discrete action space """
         # For each row: round(random .* (max - min) + min, 0)
-        random_array = prng.np_random.rand(self.num_discrete_space)
+        # random_array = prng.np_random.rand(self.num_discrete_space) #! Remove this so I don't have to downgrade gym
+        random_array = np.random.RandomState().rand(self.num_discrete_space)
         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
     def contains(self, x):
         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
diff --git a/multiagent/rendering.py b/multiagent/rendering.py
index cd00c7fb8..5a826bda6 100644
--- a/multiagent/rendering.py
+++ b/multiagent/rendering.py
@@ -11,18 +11,20 @@
         os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
         # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
 
-from gym.utils import reraise
+# from gym.utils import reraise
 from gym import error
 
 try:
     import pyglet
 except ImportError as e:
-    reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
+    # reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
+    raise ImportError
 
 try:
     from pyglet.gl import *
 except ImportError as e:
-    reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
+    # reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
+    raise ImportError
 
 import math
 import numpy as np
diff --git a/multiagent/scenario.py b/multiagent/scenario.py
deleted file mode 100644
index 02d86773e..000000000
--- a/multiagent/scenario.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import numpy as np
-
-# defines scenario upon which the world is built
-class BaseScenario(object):
-    # create elements of the world
-    def make_world(self):
-        raise NotImplementedError()
-    # create initial conditions of the world
-    def reset_world(self, world):
-        raise NotImplementedError()
diff --git a/multiagent/scenarios/__init__.py b/multiagent/scenarios/__init__.py
deleted file mode 100644
index 9a4b6f71a..000000000
--- a/multiagent/scenarios/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import imp
-import os.path as osp
-
-
-def load(name):
-    pathname = osp.join(osp.dirname(__file__), name)
-    return imp.load_source('', pathname)
diff --git a/multiagent/scenarios/simple.py b/multiagent/scenarios/simple.py
deleted file mode 100755
index 3ff80bedd..000000000
--- a/multiagent/scenarios/simple.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # add agents
-        world.agents = [Agent() for i in range(1)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.silent = True
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(1)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25,0.25,0.25])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.75,0.75,0.75])
-        world.landmarks[0].color = np.array([0.75,0.25,0.25])
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def reward(self, agent, world):
-        dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
-        return -dist2
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        return np.concatenate([agent.state.p_vel] + entity_pos)
diff --git a/multiagent/scenarios/simple_adversary.py b/multiagent/scenarios/simple_adversary.py
deleted file mode 100644
index d897bb841..000000000
--- a/multiagent/scenarios/simple_adversary.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-
-class Scenario(BaseScenario):
-
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 2
-        num_agents = 3
-        world.num_agents = num_agents
-        num_adversaries = 1
-        num_landmarks = num_agents - 1
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.silent = True
-            agent.adversary = True if i < num_adversaries else False
-            agent.size = 0.15
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-            landmark.size = 0.08
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        world.agents[0].color = np.array([0.85, 0.35, 0.35])
-        for i in range(1, world.num_agents):
-            world.agents[i].color = np.array([0.35, 0.35, 0.85])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.15, 0.15, 0.15])
-        # set goal landmark
-        goal = np.random.choice(world.landmarks)
-        goal.color = np.array([0.15, 0.65, 0.15])
-        for agent in world.agents:
-            agent.goal_a = goal
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def benchmark_data(self, agent, world):
-        # returns data for benchmarking purposes
-        if agent.adversary:
-            return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
-        else:
-            dists = []
-            for l in world.landmarks:
-                dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
-            dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
-            return tuple(dists)
-
-    # return all agents that are not adversaries
-    def good_agents(self, world):
-        return [agent for agent in world.agents if not agent.adversary]
-
-    # return all adversarial agents
-    def adversaries(self, world):
-        return [agent for agent in world.agents if agent.adversary]
-
-    def reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-
-    def agent_reward(self, agent, world):
-        # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
-        shaped_reward = True
-        shaped_adv_reward = True
-
-        # Calculate negative reward for adversary
-        adversary_agents = self.adversaries(world)
-        if shaped_adv_reward:  # distance-based adversary reward
-            adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
-        else:  # proximity-based adversary reward (binary)
-            adv_rew = 0
-            for a in adversary_agents:
-                if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
-                    adv_rew -= 5
-
-        # Calculate positive reward for agents
-        good_agents = self.good_agents(world)
-        if shaped_reward:  # distance-based agent reward
-            pos_rew = -min(
-                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-        else:  # proximity-based agent reward (binary)
-            pos_rew = 0
-            if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
-                    < 2 * agent.goal_a.size:
-                pos_rew += 5
-            pos_rew -= min(
-                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-        return pos_rew + adv_rew
-
-    def adversary_reward(self, agent, world):
-        # Rewarded based on proximity to the goal landmark
-        shaped_reward = True
-        if shaped_reward:  # distance-based reward
-            return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
-        else:  # proximity-based reward (binary)
-            adv_rew = 0
-            if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
-                adv_rew += 5
-            return adv_rew
-
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # entity colors
-        entity_color = []
-        for entity in world.landmarks:
-            entity_color.append(entity.color)
-        # communication of all other agents
-        other_pos = []
-        for other in world.agents:
-            if other is agent: continue
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
-
-        if not agent.adversary:
-            return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
-        else:
-            return np.concatenate(entity_pos + other_pos)
diff --git a/multiagent/scenarios/simple_crypto.py b/multiagent/scenarios/simple_crypto.py
deleted file mode 100644
index e1fd59237..000000000
--- a/multiagent/scenarios/simple_crypto.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""
-Scenario:
-1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
-adversary to goal. Adversary is rewarded for its distance to the goal.
-"""
-
-
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-import random
-
-
-class CryptoAgent(Agent):
-    def __init__(self):
-        super(CryptoAgent, self).__init__()
-        self.key = None
-
-class Scenario(BaseScenario):
-
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        num_agents = 3
-        num_adversaries = 1
-        num_landmarks = 2
-        world.dim_c = 4
-        # add agents
-        world.agents = [CryptoAgent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.adversary = True if i < num_adversaries else False
-            agent.speaker = True if i == 2 else False
-            agent.movable = False
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25, 0.25, 0.25])
-            if agent.adversary:
-                agent.color = np.array([0.75, 0.25, 0.25])
-            agent.key = None
-        # random properties for landmarks
-        color_list = [np.zeros(world.dim_c) for i in world.landmarks]
-        for i, color in enumerate(color_list):
-            color[i] += 1
-        for color, landmark in zip(color_list, world.landmarks):
-            landmark.color = color
-        # set goal landmark
-        goal = np.random.choice(world.landmarks)
-        world.agents[1].color = goal.color
-        world.agents[2].key = np.random.choice(world.landmarks).color
-
-        for agent in world.agents:
-            agent.goal_a = goal
-
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-
-    def benchmark_data(self, agent, world):
-        # returns data for benchmarking purposes
-        return (agent.state.c, agent.goal_a.color)
-
-    # return all agents that are not adversaries
-    def good_listeners(self, world):
-        return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
-
-    # return all agents that are not adversaries
-    def good_agents(self, world):
-        return [agent for agent in world.agents if not agent.adversary]
-
-    # return all adversarial agents
-    def adversaries(self, world):
-        return [agent for agent in world.agents if agent.adversary]
-
-    def reward(self, agent, world):
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-
-    def agent_reward(self, agent, world):
-        # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
-        good_listeners = self.good_listeners(world)
-        adversaries = self.adversaries(world)
-        good_rew = 0
-        adv_rew = 0
-        for a in good_listeners:
-            if (a.state.c == np.zeros(world.dim_c)).all():
-                continue
-            else:
-                good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
-        for a in adversaries:
-            if (a.state.c == np.zeros(world.dim_c)).all():
-                continue
-            else:
-                adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
-                adv_rew += adv_l1
-        return adv_rew + good_rew
-
-    def adversary_reward(self, agent, world):
-        # Adversary (Eve) is rewarded if it can reconstruct original goal
-        rew = 0
-        if not (agent.state.c == np.zeros(world.dim_c)).all():
-            rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
-        return rew
-
-
-    def observation(self, agent, world):
-        # goal color
-        goal_color = np.zeros(world.dim_color)
-        if agent.goal_a is not None:
-            goal_color = agent.goal_a.color
-
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # communication of all other agents
-        comm = []
-        for other in world.agents:
-            if other is agent or (other.state.c is None) or not other.speaker: continue
-            comm.append(other.state.c)
-
-        confer = np.array([0])
-
-        if world.agents[2].key is None:
-            confer = np.array([1])
-            key = np.zeros(world.dim_c)
-            goal_color = np.zeros(world.dim_c)
-        else:
-            key = world.agents[2].key
-
-        prnt = False
-        # speaker
-        if agent.speaker:
-            if prnt:
-                print('speaker')
-                print(agent.state.c)
-                print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
-            return np.concatenate([goal_color] + [key])
-        # listener
-        if not agent.speaker and not agent.adversary:
-            if prnt:
-                print('listener')
-                print(agent.state.c)
-                print(np.concatenate([key] + comm + [confer]))
-            return np.concatenate([key] + comm)
-        if not agent.speaker and agent.adversary:
-            if prnt:
-                print('adversary')
-                print(agent.state.c)
-                print(np.concatenate(comm + [confer]))
-            return np.concatenate(comm)
diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py
deleted file mode 100644
index 907fef5b5..000000000
--- a/multiagent/scenarios/simple_push.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 2
-        num_agents = 2
-        num_adversaries = 1
-        num_landmarks = 2
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = True
-            agent.silent = True
-            if i < num_adversaries:
-                agent.adversary = True
-            else:
-                agent.adversary = False
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.1, 0.1, 0.1])
-            landmark.color[i + 1] += 0.8
-            landmark.index = i
-        # set goal landmark
-        goal = np.random.choice(world.landmarks)
-        for i, agent in enumerate(world.agents):
-            agent.goal_a = goal
-            agent.color = np.array([0.25, 0.25, 0.25])
-            if agent.adversary:
-                agent.color = np.array([0.75, 0.25, 0.25])
-            else:
-                j = goal.index
-                agent.color[j + 1] += 0.5
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-
-    def agent_reward(self, agent, world):
-        # the distance to the goal
-        return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
-
-    def adversary_reward(self, agent, world):
-        # keep the nearest good agents away from the goal
-        agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
-        pos_rew = min(agent_dist)
-        #nearest_agent = world.good_agents[np.argmin(agent_dist)]
-        #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
-        neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
-        #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
-        return pos_rew - neg_rew
-               
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:  # world.entities:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # entity colors
-        entity_color = []
-        for entity in world.landmarks:  # world.entities:
-            entity_color.append(entity.color)
-        # communication of all other agents
-        comm = []
-        other_pos = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
-        if not agent.adversary:
-            return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
-        else:
-            #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
-            return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
diff --git a/multiagent/scenarios/simple_reference.py b/multiagent/scenarios/simple_reference.py
deleted file mode 100755
index 2ea84a645..000000000
--- a/multiagent/scenarios/simple_reference.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 10
-        world.collaborative = True  # whether agents share rewards
-        # add agents
-        world.agents = [Agent() for i in range(2)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(3)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # assign goals to agents
-        for agent in world.agents:
-            agent.goal_a = None
-            agent.goal_b = None
-        # want other agent to go to the goal landmark
-        world.agents[0].goal_a = world.agents[1]
-        world.agents[0].goal_b = np.random.choice(world.landmarks)
-        world.agents[1].goal_a = world.agents[0]
-        world.agents[1].goal_b = np.random.choice(world.landmarks)
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25,0.25,0.25])               
-        # random properties for landmarks
-        world.landmarks[0].color = np.array([0.75,0.25,0.25]) 
-        world.landmarks[1].color = np.array([0.25,0.75,0.25]) 
-        world.landmarks[2].color = np.array([0.25,0.25,0.75]) 
-        # special colors for goals
-        world.agents[0].goal_a.color = world.agents[0].goal_b.color                
-        world.agents[1].goal_a.color = world.agents[1].goal_b.color                               
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def reward(self, agent, world):
-        if agent.goal_a is None or agent.goal_b is None:
-            return 0.0
-        dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
-        return -dist2
-
-    def observation(self, agent, world):
-        # goal color
-        goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
-        if agent.goal_b is not None:
-            goal_color[1] = agent.goal_b.color 
-
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # entity colors
-        entity_color = []
-        for entity in world.landmarks:
-            entity_color.append(entity.color)
-        # communication of all other agents
-        comm = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-        return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
-            
\ No newline at end of file
diff --git a/multiagent/scenarios/simple_speaker_listener.py b/multiagent/scenarios/simple_speaker_listener.py
deleted file mode 100755
index 896e64e03..000000000
--- a/multiagent/scenarios/simple_speaker_listener.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 3
-        num_landmarks = 3
-        world.collaborative = True
-        # add agents
-        world.agents = [Agent() for i in range(2)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.size = 0.075
-        # speaker
-        world.agents[0].movable = False
-        # listener
-        world.agents[1].silent = True
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-            landmark.size = 0.04
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # assign goals to agents
-        for agent in world.agents:
-            agent.goal_a = None
-            agent.goal_b = None
-        # want listener to go to the goal landmark
-        world.agents[0].goal_a = world.agents[1]
-        world.agents[0].goal_b = np.random.choice(world.landmarks)
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25,0.25,0.25])               
-        # random properties for landmarks
-        world.landmarks[0].color = np.array([0.65,0.15,0.15])
-        world.landmarks[1].color = np.array([0.15,0.65,0.15])
-        world.landmarks[2].color = np.array([0.15,0.15,0.65])
-        # special colors for goals
-        world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def benchmark_data(self, agent, world):
-        # returns data for benchmarking purposes
-        return self.reward(agent, reward)
-
-    def reward(self, agent, world):
-        # squared distance from listener to landmark
-        a = world.agents[0]
-        dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
-        return -dist2
-
-    def observation(self, agent, world):
-        # goal color
-        goal_color = np.zeros(world.dim_color)
-        if agent.goal_b is not None:
-            goal_color = agent.goal_b.color
-
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-
-        # communication of all other agents
-        comm = []
-        for other in world.agents:
-            if other is agent or (other.state.c is None): continue
-            comm.append(other.state.c)
-        
-        # speaker
-        if not agent.movable:
-            return np.concatenate([goal_color])
-        # listener
-        if agent.silent:
-            return np.concatenate([agent.state.p_vel] + entity_pos + comm)
-            
diff --git a/multiagent/scenarios/simple_spread.py b/multiagent/scenarios/simple_spread.py
deleted file mode 100644
index 263dfb4df..000000000
--- a/multiagent/scenarios/simple_spread.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 2
-        num_agents = 3
-        num_landmarks = 3
-        world.collaborative = True
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = True
-            agent.silent = True
-            agent.size = 0.15
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.35, 0.35, 0.85])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.25, 0.25, 0.25])
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def benchmark_data(self, agent, world):
-        rew = 0
-        collisions = 0
-        occupied_landmarks = 0
-        min_dists = 0
-        for l in world.landmarks:
-            dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
-            min_dists += min(dists)
-            rew -= min(dists)
-            if min(dists) < 0.1:
-                occupied_landmarks += 1
-        if agent.collide:
-            for a in world.agents:
-                if self.is_collision(a, agent):
-                    rew -= 1
-                    collisions += 1
-        return (rew, collisions, min_dists, occupied_landmarks)
-
-
-    def is_collision(self, agent1, agent2):
-        delta_pos = agent1.state.p_pos - agent2.state.p_pos
-        dist = np.sqrt(np.sum(np.square(delta_pos)))
-        dist_min = agent1.size + agent2.size
-        return True if dist < dist_min else False
-
-    def reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
-        rew = 0
-        for l in world.landmarks:
-            dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
-            rew -= min(dists)
-        if agent.collide:
-            for a in world.agents:
-                if self.is_collision(a, agent):
-                    rew -= 1
-        return rew
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:  # world.entities:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # entity colors
-        entity_color = []
-        for entity in world.landmarks:  # world.entities:
-            entity_color.append(entity.color)
-        # communication of all other agents
-        comm = []
-        other_pos = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
-        return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
diff --git a/multiagent/scenarios/simple_tag.py b/multiagent/scenarios/simple_tag.py
deleted file mode 100644
index e4b65b646..000000000
--- a/multiagent/scenarios/simple_tag.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 2
-        num_good_agents = 1
-        num_adversaries = 3
-        num_agents = num_adversaries + num_good_agents
-        num_landmarks = 2
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = True
-            agent.silent = True
-            agent.adversary = True if i < num_adversaries else False
-            agent.size = 0.075 if agent.adversary else 0.05
-            agent.accel = 3.0 if agent.adversary else 4.0
-            #agent.accel = 20.0 if agent.adversary else 25.0
-            agent.max_speed = 1.0 if agent.adversary else 1.3
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = True
-            landmark.movable = False
-            landmark.size = 0.2
-            landmark.boundary = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
-            # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.25, 0.25, 0.25])
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            if not landmark.boundary:
-                landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
-                landmark.state.p_vel = np.zeros(world.dim_p)
-
-
-    def benchmark_data(self, agent, world):
-        # returns data for benchmarking purposes
-        if agent.adversary:
-            collisions = 0
-            for a in self.good_agents(world):
-                if self.is_collision(a, agent):
-                    collisions += 1
-            return collisions
-        else:
-            return 0
-
-
-    def is_collision(self, agent1, agent2):
-        delta_pos = agent1.state.p_pos - agent2.state.p_pos
-        dist = np.sqrt(np.sum(np.square(delta_pos)))
-        dist_min = agent1.size + agent2.size
-        return True if dist < dist_min else False
-
-    # return all agents that are not adversaries
-    def good_agents(self, world):
-        return [agent for agent in world.agents if not agent.adversary]
-
-    # return all adversarial agents
-    def adversaries(self, world):
-        return [agent for agent in world.agents if agent.adversary]
-
-
-    def reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-        return main_reward
-
-    def agent_reward(self, agent, world):
-        # Agents are negatively rewarded if caught by adversaries
-        rew = 0
-        shape = False
-        adversaries = self.adversaries(world)
-        if shape:  # reward can optionally be shaped (increased reward for increased distance from adversary)
-            for adv in adversaries:
-                rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
-        if agent.collide:
-            for a in adversaries:
-                if self.is_collision(a, agent):
-                    rew -= 10
-
-        # agents are penalized for exiting the screen, so that they can be caught by the adversaries
-        def bound(x):
-            if x < 0.9:
-                return 0
-            if x < 1.0:
-                return (x - 0.9) * 10
-            return min(np.exp(2 * x - 2), 10)
-        for p in range(world.dim_p):
-            x = abs(agent.state.p_pos[p])
-            rew -= bound(x)
-
-        return rew
-
-    def adversary_reward(self, agent, world):
-        # Adversaries are rewarded for collisions with agents
-        rew = 0
-        shape = False
-        agents = self.good_agents(world)
-        adversaries = self.adversaries(world)
-        if shape:  # reward can optionally be shaped (decreased reward for increased distance from agents)
-            for adv in adversaries:
-                rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
-        if agent.collide:
-            for ag in agents:
-                for adv in adversaries:
-                    if self.is_collision(ag, adv):
-                        rew += 10
-        return rew
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            if not entity.boundary:
-                entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # communication of all other agents
-        comm = []
-        other_pos = []
-        other_vel = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
-            if not other.adversary:
-                other_vel.append(other.state.p_vel)
-        return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
diff --git a/multiagent/scenarios/simple_world_comm.py b/multiagent/scenarios/simple_world_comm.py
deleted file mode 100644
index 499456e40..000000000
--- a/multiagent/scenarios/simple_world_comm.py
+++ /dev/null
@@ -1,289 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # set any world properties first
-        world.dim_c = 4
-        #world.damping = 1
-        num_good_agents = 2
-        num_adversaries = 4
-        num_agents = num_adversaries + num_good_agents
-        num_landmarks = 1
-        num_food = 2
-        num_forests = 2
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = True
-            agent.leader = True if i == 0 else False
-            agent.silent = True if i > 0 else False
-            agent.adversary = True if i < num_adversaries else False
-            agent.size = 0.075 if agent.adversary else 0.045
-            agent.accel = 3.0 if agent.adversary else 4.0
-            #agent.accel = 20.0 if agent.adversary else 25.0
-            agent.max_speed = 1.0 if agent.adversary else 1.3
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = True
-            landmark.movable = False
-            landmark.size = 0.2
-            landmark.boundary = False
-        world.food = [Landmark() for i in range(num_food)]
-        for i, landmark in enumerate(world.food):
-            landmark.name = 'food %d' % i
-            landmark.collide = False
-            landmark.movable = False
-            landmark.size = 0.03
-            landmark.boundary = False
-        world.forests = [Landmark() for i in range(num_forests)]
-        for i, landmark in enumerate(world.forests):
-            landmark.name = 'forest %d' % i
-            landmark.collide = False
-            landmark.movable = False
-            landmark.size = 0.3
-            landmark.boundary = False
-        world.landmarks += world.food
-        world.landmarks += world.forests
-        #world.landmarks += self.set_boundaries(world)  # world boundaries now penalized with negative reward
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def set_boundaries(self, world):
-        boundary_list = []
-        landmark_size = 1
-        edge = 1 + landmark_size
-        num_landmarks = int(edge * 2 / landmark_size)
-        for x_pos in [-edge, edge]:
-            for i in range(num_landmarks):
-                l = Landmark()
-                l.state.p_pos = np.array([x_pos, -1 + i * landmark_size])
-                boundary_list.append(l)
-
-        for y_pos in [-edge, edge]:
-            for i in range(num_landmarks):
-                l = Landmark()
-                l.state.p_pos = np.array([-1 + i * landmark_size, y_pos])
-                boundary_list.append(l)
-
-        for i, l in enumerate(boundary_list):
-            l.name = 'boundary %d' % i
-            l.collide = True
-            l.movable = False
-            l.boundary = True
-            l.color = np.array([0.75, 0.75, 0.75])
-            l.size = landmark_size
-            l.state.p_vel = np.zeros(world.dim_p)
-
-        return boundary_list
-
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45])
-            agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0])
-            # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.25, 0.25, 0.25])
-        for i, landmark in enumerate(world.food):
-            landmark.color = np.array([0.15, 0.15, 0.65])
-        for i, landmark in enumerate(world.forests):
-            landmark.color = np.array([0.6, 0.9, 0.6])
-        # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-        for i, landmark in enumerate(world.food):
-            landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-        for i, landmark in enumerate(world.forests):
-            landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def benchmark_data(self, agent, world):
-        if agent.adversary:
-            collisions = 0
-            for a in self.good_agents(world):
-                if self.is_collision(a, agent):
-                    collisions += 1
-            return collisions
-        else:
-            return 0
-
-
-    def is_collision(self, agent1, agent2):
-        delta_pos = agent1.state.p_pos - agent2.state.p_pos
-        dist = np.sqrt(np.sum(np.square(delta_pos)))
-        dist_min = agent1.size + agent2.size
-        return True if dist < dist_min else False
-
-
-    # return all agents that are not adversaries
-    def good_agents(self, world):
-        return [agent for agent in world.agents if not agent.adversary]
-
-    # return all adversarial agents
-    def adversaries(self, world):
-        return [agent for agent in world.agents if agent.adversary]
-
-
-    def reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        #boundary_reward = -10 if self.outside_boundary(agent) else 0
-        main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-        return main_reward
-
-    def outside_boundary(self, agent):
-        if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1:
-            return True
-        else:
-            return False
-
-
-    def agent_reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        rew = 0
-        shape = False
-        adversaries = self.adversaries(world)
-        if shape:
-            for adv in adversaries:
-                rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
-        if agent.collide:
-            for a in adversaries:
-                if self.is_collision(a, agent):
-                    rew -= 5
-        def bound(x):
-            if x < 0.9:
-                return 0
-            if x < 1.0:
-                return (x - 0.9) * 10
-            return min(np.exp(2 * x - 2), 10)  # 1 + (x - 1) * (x - 1)
-
-        for p in range(world.dim_p):
-            x = abs(agent.state.p_pos[p])
-            rew -= 2 * bound(x)
-
-        for food in world.food:
-            if self.is_collision(agent, food):
-                rew += 2
-        rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food])
-
-        return rew
-
-    def adversary_reward(self, agent, world):
-        # Agents are rewarded based on minimum agent distance to each landmark
-        rew = 0
-        shape = True
-        agents = self.good_agents(world)
-        adversaries = self.adversaries(world)
-        if shape:
-            rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents])
-        if agent.collide:
-            for ag in agents:
-                for adv in adversaries:
-                    if self.is_collision(ag, adv):
-                        rew += 5
-        return rew
-
-
-    def observation2(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            if not entity.boundary:
-                entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-
-        food_pos = []
-        for entity in world.food:
-            if not entity.boundary:
-                food_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # communication of all other agents
-        comm = []
-        other_pos = []
-        other_vel = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
-            if not other.adversary:
-                other_vel.append(other.state.p_vel)
-        return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            if not entity.boundary:
-                entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-
-        in_forest = [np.array([-1]), np.array([-1])]
-        inf1 = False
-        inf2 = False
-        if self.is_collision(agent, world.forests[0]):
-            in_forest[0] = np.array([1])
-            inf1= True
-        if self.is_collision(agent, world.forests[1]):
-            in_forest[1] = np.array([1])
-            inf2 = True
-
-        food_pos = []
-        for entity in world.food:
-            if not entity.boundary:
-                food_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # communication of all other agents
-        comm = []
-        other_pos = []
-        other_vel = []
-        for other in world.agents:
-            if other is agent: continue
-            comm.append(other.state.c)
-            oth_f1 = self.is_collision(other, world.forests[0])
-            oth_f2 = self.is_collision(other, world.forests[1])
-            if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader:  #without forest vis
-                other_pos.append(other.state.p_pos - agent.state.p_pos)
-                if not other.adversary:
-                    other_vel.append(other.state.p_vel)
-            else:
-                other_pos.append([0, 0])
-                if not other.adversary:
-                    other_vel.append([0, 0])
-
-        # to tell the pred when the prey are in the forest
-        prey_forest = []
-        ga = self.good_agents(world)
-        for a in ga:
-            if any([self.is_collision(a, f) for f in world.forests]):
-                prey_forest.append(np.array([1]))
-            else:
-                prey_forest.append(np.array([-1]))
-        # to tell leader when pred are in forest
-        prey_forest_lead = []
-        for f in world.forests:
-            if any([self.is_collision(a, f) for a in ga]):
-                prey_forest_lead.append(np.array([1]))
-            else:
-                prey_forest_lead.append(np.array([-1]))
-
-        comm = [world.agents[0].state.c]
-
-        if agent.adversary and not agent.leader:
-            return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
-        if agent.leader:
-            return np.concatenate(
-                [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
-        else:
-            return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel)
-
-
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..39fca3b46
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,87 @@
+absl-py==0.11.0
+aiohttp==3.7.3
+aiohttp-cors==0.7.0
+aioredis==1.3.1
+astunparse==1.6.3
+async-timeout==3.0.1
+atari-py==0.2.6
+attrs==20.3.0
+blessings==1.7
+cachetools==4.2.0
+certifi==2020.12.5
+chardet==3.0.4
+click==7.1.2
+cloudpickle==1.6.0
+colorama==0.4.4
+colorful==0.5.4
+contextvars==2.4
+dataclasses==0.8
+dm-tree==0.1.5
+filelock==3.0.12
+flatbuffers==1.12
+future==0.18.2
+gast==0.3.3
+google-api-core==1.24.1
+google-auth==1.24.0
+google-auth-oauthlib==0.4.2
+google-pasta==0.2.0
+googleapis-common-protos==1.52.0
+gpustat==0.6.0
+grpcio==1.32.0
+gym==0.18.0
+h5py==2.10.0
+hiredis==1.1.0
+idna==2.10
+idna-ssl==1.1.0
+immutables==0.14
+importlib-metadata==3.4.0
+jsonschema==3.2.0
+Keras-Preprocessing==1.1.2
+lz4==3.1.1
+Markdown==3.3.3
+msgpack==1.0.2
+-e git+git@github.com:DrArtemi/multiagent-particle-envs.git@b2f65814be5242d406b45a1ef9fc989e042233f0#egg=multiagent
+multidict==5.1.0
+numpy==1.19.5
+numpy-stl==2.13.1
+nvidia-ml-py3==7.352.0
+oauthlib==3.1.0
+opencensus==0.7.11
+opencensus-context==0.1.2
+opencv-python==4.5.1.48
+opencv-python-headless==4.3.0.36
+opt-einsum==3.3.0
+pandas==1.1.5
+Pillow==7.2.0
+prometheus-client==0.9.0
+protobuf==3.14.0
+psutil==5.8.0
+py-spy==0.3.3
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pyglet==1.5.0
+pyrsistent==0.17.3
+python-dateutil==2.8.1
+python-utils==2.4.0
+pytz==2020.5
+PyYAML==5.3.1
+ray==1.1.0
+redis==3.5.3
+requests==2.25.1
+requests-oauthlib==1.3.0
+rsa==4.7
+scipy==1.5.4
+six==1.15.0
+tabulate==0.8.7
+tensorboard==2.4.0
+tensorboard-plugin-wit==1.7.0
+tensorboardX==2.1
+tensorflow==2.4.0
+tensorflow-estimator==2.4.0
+termcolor==1.1.0
+typing-extensions==3.7.4.3
+urllib3==1.26.2
+Werkzeug==1.0.1
+wrapt==1.12.1
+yarl==1.6.3
+zipp==3.4.0