From 6d30c9295d4cf797cb637d784e34d52d2ba20a74 Mon Sep 17 00:00:00 2001 From: Glenn Lewis <6598971+gmlewis@users.noreply.github.com> Date: Fri, 31 Aug 2018 18:30:48 -0400 Subject: [PATCH] Account for number of constants in Gene --- .../openai-gym/algorithms/Copy-v0/main.go | 70 +++++++++++++++++++ gene/gene.go | 2 +- 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 experiments/openai-gym/algorithms/Copy-v0/main.go diff --git a/experiments/openai-gym/algorithms/Copy-v0/main.go b/experiments/openai-gym/algorithms/Copy-v0/main.go new file mode 100644 index 0000000..9bcd660 --- /dev/null +++ b/experiments/openai-gym/algorithms/Copy-v0/main.go @@ -0,0 +1,70 @@ +// -*- compile-command: "go run main.go"; -*- + +// Copy-v0 runs a GEP algorithm on the OpenAI Gym "Copy-v0" Algorithm problem. +// https://gym.openai.com/envs/Copy-v0/ +package main + +import ( + "log" + + gym "github.com/openai/gym-http-api/binding-go" +) + +const ( + baseURL = "http://localhost:5000" + env = "Copy-v0" +) + +func main() { + c, err := gym.NewClient(baseURL) + if err != nil { + log.Fatalf("gym.NewClient(%q): %v", baseURL, err) + } + + id, err := c.Create(env) + if err != nil { + log.Fatalf("Unable to create environment %q: %v", env, err) + } + defer c.Close(id) + log.Printf("id=%q", id) + + actionSpace, err := c.ActionSpace(id) + if err != nil { + log.Fatalf("ActionSpace(%q): %v", id, err) + } + log.Printf("Action space: %+v", actionSpace) + observationSpace, err := c.ObservationSpace(id) + if err != nil { + log.Fatalf("ObservationSpace(%q): %v", id, err) + } + log.Printf("Observation space: %+v", observationSpace) + + if err := c.StartMonitor(id, "/tmp/"+env, true, false, false); err != nil { + log.Fatalf(`StartMonitor(%q, "/tmp/%v"): %v`, id, env) + } + + obs, err := c.Reset(id) + if err != nil { + log.Fatalf("Reset(%q): %v", id, err) + } + log.Printf("1st observation: %v", obs) + var done bool + for !done { + // TODO: Replace SampleAction with GEP algorithm. + act, err := c.SampleAction(id) + if err != nil { + log.Fatalf("SampleAction(%q): %v", err) + } + + var reward float64 + obs, reward, done, _, err = c.Step(id, act, false) + if err != nil { + log.Fatalf("Step(%q, %v, false): %v", id, act, err) + } + log.Printf("reward=%v, observatioan=%v", reward, obs) + } + + if err := c.CloseMonitor(id); err != nil { + log.Fatalf("CloseMonitor(%q): %v", id, err) + } +} diff --git a/gene/gene.go b/gene/gene.go index 5a4ea19..e358876 100644 --- a/gene/gene.go +++ b/gene/gene.go @@ -82,7 +82,7 @@ func New(x string) *Gene { // properties of the gene, and functions provide the available functions and // their respective weights to be used in the creation of the gene. func RandomNew(headSize, tailSize, numTerminals, numConstants int, functions []FuncWeight) *Gene { - totalWeight := numTerminals + totalWeight := numTerminals + numConstants for _, f := range functions { totalWeight += f.Weight }