From fdd6b87eab2e1bad853cecef31aedc6076420231 Mon Sep 17 00:00:00 2001 From: zilvinas Date: Wed, 16 Oct 2024 00:44:58 +0300 Subject: [PATCH] .. --- dist/assets/index-BV_o50Ui.js | 1 - dist/assets/index-Cp6D6t7L.js | 13 ++ dist/index.html | 10 +- index.html | 10 +- index.html.o | 40 ---- src/game-15-reconstruction/Action.ts | 6 + src/game-15-reconstruction/ConsoleUtils.ts | 20 ++ src/game-15-reconstruction/Environment.ts | 145 ++++++++++++ .../EnvironmentActionResult.ts | 21 ++ .../EnvironmentState.ts | 90 ++++++++ src/game-15-reconstruction/EpisodeRunner.ts | 115 ++++++++++ .../ExperienceRecord.ts | 47 ++++ src/game-15-reconstruction/GameUtils.ts | 103 +++++++++ src/game-15-reconstruction/Main.ts | 12 + src/game-15-reconstruction/QTableGenerator.ts | 144 ++++++++++++ src/game-15-reconstruction/QTableRow.ts | 55 +++++ src/game-15-reconstruction/QTableUpdater.ts | 68 ++++++ src/game-15-reconstruction/StateProducer.ts | 207 ++++++++++++++++++ src/game-15-reconstruction/StateShuffle.ts | 40 ++++ src/game-15-reconstruction/Trainer.ts | 30 +++ src/game-15-reconstruction/utils/Pair.ts | 16 ++ .../utils/SerializedObjectLoader.ts | 32 +++ .../utils/SerializedObjectSaver.ts | 42 ++++ src/game-15-reconstruction/utils/Utils.ts | 78 +++++++ src/main.ts | 3 + 25 files changed, 1297 insertions(+), 51 deletions(-) delete mode 100644 dist/assets/index-BV_o50Ui.js create mode 100644 dist/assets/index-Cp6D6t7L.js delete mode 100644 index.html.o create mode 100644 src/game-15-reconstruction/Action.ts create mode 100644 src/game-15-reconstruction/ConsoleUtils.ts create mode 100644 src/game-15-reconstruction/Environment.ts create mode 100644 src/game-15-reconstruction/EnvironmentActionResult.ts create mode 100644 src/game-15-reconstruction/EnvironmentState.ts create mode 100644 src/game-15-reconstruction/EpisodeRunner.ts create mode 100644 src/game-15-reconstruction/ExperienceRecord.ts create mode 100644 src/game-15-reconstruction/GameUtils.ts create mode 100644 src/game-15-reconstruction/Main.ts create mode 100644 src/game-15-reconstruction/QTableGenerator.ts create mode 100644 src/game-15-reconstruction/QTableRow.ts create mode 100644 src/game-15-reconstruction/QTableUpdater.ts create mode 100644 src/game-15-reconstruction/StateProducer.ts create mode 100644 src/game-15-reconstruction/StateShuffle.ts create mode 100644 src/game-15-reconstruction/Trainer.ts create mode 100644 src/game-15-reconstruction/utils/Pair.ts create mode 100644 src/game-15-reconstruction/utils/SerializedObjectLoader.ts create mode 100644 src/game-15-reconstruction/utils/SerializedObjectSaver.ts create mode 100644 src/game-15-reconstruction/utils/Utils.ts diff --git a/dist/assets/index-BV_o50Ui.js b/dist/assets/index-BV_o50Ui.js deleted file mode 100644 index 5c37f48..0000000 --- a/dist/assets/index-BV_o50Ui.js +++ /dev/null @@ -1 +0,0 @@ -(function(){const r=document.createElement("link").relList;if(r&&r.supports&&r.supports("modulepreload"))return;for(const e of document.querySelectorAll('link[rel="modulepreload"]'))n(e);new MutationObserver(e=>{for(const t of e)if(t.type==="childList")for(const c of t.addedNodes)c.tagName==="LINK"&&c.rel==="modulepreload"&&n(c)}).observe(document,{childList:!0,subtree:!0});function i(e){const t={};return e.integrity&&(t.integrity=e.integrity),e.referrerPolicy&&(t.referrerPolicy=e.referrerPolicy),e.crossOrigin==="use-credentials"?t.credentials="include":e.crossOrigin==="anonymous"?t.credentials="omit":t.credentials="same-origin",t}function n(e){if(e.ep)return;e.ep=!0;const t=i(e);fetch(e.href,t)}})();function u(s){var e,t,c;let r=0;const i=o=>o>=100?o-100:o<=-100?o+100:o,n=o=>{r=i(o);const d=`${r}`;s.innerHTML=d};(e=document.getElementById("increaseByOne"))==null||e.addEventListener("click",()=>n(r+1)),(t=document.getElementById("decreaseByOne"))==null||t.addEventListener("click",()=>n(r-1)),(c=document.getElementById("increaseByTwo"))==null||c.addEventListener("click",()=>n(r+2)),document.getElementById("decreaseByTwo"),n(0)}u(document.getElementById("counter-value")); diff --git a/dist/assets/index-Cp6D6t7L.js b/dist/assets/index-Cp6D6t7L.js new file mode 100644 index 0000000..bf8a885 --- /dev/null +++ b/dist/assets/index-Cp6D6t7L.js @@ -0,0 +1,13 @@ +var P=Object.defineProperty;var X=(d,t,e)=>t in d?P(d,t,{enumerable:!0,configurable:!0,writable:!0,value:e}):d[t]=e;var c=(d,t,e)=>X(d,typeof t!="symbol"?t+"":t,e);(function(){const t=document.createElement("link").relList;if(t&&t.supports&&t.supports("modulepreload"))return;for(const n of document.querySelectorAll('link[rel="modulepreload"]'))s(n);new MutationObserver(n=>{for(const i of n)if(i.type==="childList")for(const l of i.addedNodes)l.tagName==="LINK"&&l.rel==="modulepreload"&&s(l)}).observe(document,{childList:!0,subtree:!0});function e(n){const i={};return n.integrity&&(i.integrity=n.integrity),n.referrerPolicy&&(i.referrerPolicy=n.referrerPolicy),n.crossOrigin==="use-credentials"?i.credentials="include":n.crossOrigin==="anonymous"?i.credentials="omit":i.credentials="same-origin",i}function s(n){if(n.ep)return;n.ep=!0;const i=e(n);fetch(n.href,i)}})();const H=class H{static prnt(t){console.log(t)}static str(t,e){const s=t.toString();if(s.length>=e)return s;const n=e-s.length;return s+H.emptyString.substring(0,n)}static sum(t){return!t||(t=t.filter(e=>e!==0),t.length===0)?0:t.reduce((e,s)=>e+s,0)}static toString(t){return`[${Array.from(t).map(String).join(", ")}]`}static px(){return(t,e)=>t}static equalArrays(t,e){if(t.length!==e.length)return!1;for(let s=0;sMath.random()-.5)}};c(H,"emptyString"," ");let u=H;class G{static save(t,e){u.prnt(t),u.prnt(e)}}var r=(d=>(d.L="L",d.R="R",d.U="U",d.D="D",d))(r||{});class k{constructor(t,e){this.key=t,this.value=e}getKey(){return this.key}getValue(){return this.value}static P(t,e){return new k(t,e)}}class b{static blue(t){return`\x1B[34m${t}\x1B[0m`}static green(t){return`\x1B[32m${t}\x1B[0m`}static color(t,e){return`\x1B[${e}m${t}\x1B[0m`}static clearScreen(){console.log("clearScrn ... ")}}class p{static makeMove(t,e){const n=t.indexOf(-1),i=this.getXY(n);let l=i.getKey(),o=i.getValue();e===r.L&&(l-=1),e===r.R&&(l+=1),e===r.U&&(o-=1),e===r.D&&(o+=1);const h=this.getIndex(l,o),g=[...t],f=g[h];return g[n]=f,g[h]=-1,g}static getXY(t){const e=t%4,s=Math.floor(t/4);return k.P(e,s)}static getIndex(t,e){return e*4+t}static _getValidMoves(t){const e=this.getXY(t),s=Object.values(r).slice();return e.getKey()===0&&s.splice(s.indexOf(r.L),1),e.getKey()===3&&s.splice(s.indexOf(r.R),1),e.getValue()===0&&s.splice(s.indexOf(r.U),1),e.getValue()===3&&s.splice(s.indexOf(r.D),1),s}static getValidMoves(t,e){const s=this._getValidMoves(t),n=this.getXY(t);return this.contains(e,n.getKey()-1,n.getValue())&&s.splice(s.indexOf(r.L),1),this.contains(e,n.getKey()+1,n.getValue())&&s.splice(s.indexOf(r.R),1),this.contains(e,n.getKey(),n.getValue()-1)&&s.splice(s.indexOf(r.U),1),this.contains(e,n.getKey(),n.getValue()+1)&&s.splice(s.indexOf(r.D),1),s}static contains(t,e,s){return t.includes(this.getIndex(e,s))}static stateAsString(t,e){return Array.from({length:16},(s,n)=>{let i;const l=t[n];return l===-1?i=b.blue("*"):e.includes(l)?i=b.color(l.toString(),100):e.includes(n+1)?i=b.green(l.toString()):i=l.toString(),i+=" ",n!==0&&(n+1)%4===0&&(i+=` +`),i}).join("")}static getReverseAction(t){return t===r.D?r.U:t===r.U?r.D:t===r.L?r.R:t===r.R?r.L:null}}class x{static main(){const t=E.generateLessons()[0];let e=[...t.getState()];const s=[...t.getGoals()];x.prntState(e,s),e=x.shuffle(e,t.getLockedStateElements(),1e3),x.prntState(e,s)}static shuffle(t,e,s){const n=e.map(l=>l-1);let i=0;for(;ii+1),s.state=[...a.stateDone],s.episodesToTrain=100,a.shuffle(s,s.lockedStateElements),s}static moveHole(t,e){const s=u.shuffleArray(e)[0],n=t.state.indexOf(-1),i=t.state[s];return t.state[s]=-1,t.state[n]=i,t}static state7_8(t){const e=new a(t);return e.goals=[7,8],e.lockedStateElements=[1,2,3,4,5,6],e.state=[...a.stateDone],e.episodesToTrain=100,a.shuffle(e,[1,2,3,4,5,6,7]),e}static state9_13(t){const e=new a(t);return e.goals=[9,13],e.lockedStateElements=[1,2,3,4,5,6,7,8],e.state=[...a.stateDone],e.episodesToTrain=100,a.shuffle(e,e.lockedStateElements),e}static state10_15(t){const e=new a(t);return e.goals=[10,11,14,15],e.lockedStateElements=[1,2,3,4,5,6,7,8,9,13],e.state=x.shuffle([...a.stateDone],e.lockedStateElements,500),e.episodesToTrain=100,e}static state12(t){const e=new a(t);return e.goals=[12],e.lockedStateElements=[1,2,3,4,5,6,7,8,9,10,11,13,14,15],e.state=x.shuffle([...a.stateDone],e.lockedStateElements,500),e.episodesToTrain=10,e}isLockedIndex(t){return this.lockedStateElements.includes(t+1)}shuffleState(){this.state=x.shuffle(this.state,this.lockedStateElements,500)}static shuffle(t,e){let s=t.state.filter(n=>!e.includes(n));s=u.shuffleArray(s),t.state=[...e,...s]}resetState(){const t=a.generateLessons()[this.lessonNb];this.goals=[...t.goals],this.lockedStateElements=[...t.lockedStateElements],this.state=[...t.state]}};c(a,"stateDone",[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-1]);let E=a;class v{constructor(t,e){c(this,"state");c(this,"goals");c(this,"fixedElements");t instanceof v?(this.state=[...t.getState()],this.goals=[...t.getGoals()],this.fixedElements=[...t.getFixedElements()]):(this.state=[...t],this.goals=[...(e==null?void 0:e.getGoals())??[]],this.fixedElements=[...(e==null?void 0:e.getLockedStateElements())??[]])}getState(){return this.state}setState(t){this.state=t}getGoals(){return this.goals}setGoals(t){this.goals=t}getFixedElements(){return this.fixedElements}setFixedElements(t){this.fixedElements=t}getHashCodeV2(){const t=this.getHashCodeV3__();return this.hashString(t)}getHashCodeV3__(){return Array.from({length:16},(t,e)=>{let s;const n=this.state[e];return n===-1?s="*":this.goals.includes(n)?s=String(n):this.goals.includes(e+1)?s="o":s=" ",s=s+" ",e!==0&&(e+1)%4===0&&(s=s+` +`),s}).join("")}getHashCode(){const t=this.state.map(String).join(",");return this.hashString(t)}hashString(t){let e=0;for(let s=0;sn-1);return p.getValidMoves(e,s)}executeAction(t,e){const s=p.makeMove(t.getState(),e),n=new v(s,m.stateProducer);let i=m._isTerminalSuccess(s,this.goals);this.state=new v(s,m.stateProducer);let l=NaN;this.reverseAction=p.getReverseAction(e),this.circleAction.push(e),this.circleAction.length>8&&this.circleAction.shift(),(u.equalArrays(this.circleAction,this.bigCircleAction1)||u.equalArrays(this.circleAction,this.bigCircleAction2)||u.equalArrays(this.circleAction,this.smallCircleAction1)||u.equalArrays(this.circleAction,this.smallCircleAction2))&&(i=!0);const o=this.state.getState().indexOf(-1);return m.stateProducer.isLockedIndex(o)&&(i=!0,l=-1),isNaN(l)&&(l=this.getReward(s,this.goals)),new $(n,e,l,i)}static _isTerminalSuccess(t,e){if(t.length!==16)throw new Error("newState.size() != 16");return e.filter(s=>t[s-1]===s).length===e.length}prntInfo(){u.prnt(` + +================================================ +`);const t=this.state.getState(),e=t.indexOf(-1),s=p.getXY(e);u.prnt(s);const n=p.getIndex(s.getKey(),s.getValue());u.prnt(`${e} - ${n}`);const i=p._getValidMoves(e);u.prnt(i);const l=this.getReward(t,this.goals);u.prnt(l);const o=p.stateAsString(t,this.goals);u.prnt(o)}getReward(t,e){const s=t.indexOf(-1),n=p.getXY(s),l=e.map(h=>this.getDistance(p.getXY(t.indexOf(h)),p.getXY(h-1))).reduce((h,g)=>h+g,0);if(l===0)return 100.5;const o=e.reduce((h,g)=>h+this.getDistance(p.getXY(t.indexOf(g)),n),0);return u.prnt(`d0Sum: ${l}`),u.prnt(`d1Sum: ${o}`),1/(l+o)}getDistance(t,e){const s=Math.pow(e.getKey()-t.getKey(),2),n=Math.pow(e.getValue()-t.getValue(),2);return Math.sqrt(s+n)}};c(m,"stateProducer");let w=m;class O{constructor(t,e,s,n,i){c(this,"state");c(this,"action");c(this,"reward");c(this,"done");c(this,"newState");this.state=new v(t),this.action=e,this.reward=s,this.done=n,this.newState=new v(i)}getState(){return this.state}getAction(){return this.action}getReward(){return this.reward}isDone(){return this.done}getNewState(){return this.newState}equals(t){return t instanceof O?this.hashCode()===t.hashCode():!1}hashCode(){return this.state.getHashCodeV2()^this.newState.getHashCodeV2()}}class B{constructor(t){c(this,"state");c(this,"qValues");this.state=t,this.qValues=new Map}setValue(t,e){const s=w.getPossibleActions(this.state);if(this.qValues.size===0&&s.forEach(n=>this.qValues.set(n,0)),!s.includes(t)){console.warn("WARNING: !moves.includes(action)");return}this.qValues.set(t,e)}getValue(t){return this.qValues.get(t)||0}getActionWithMaxValue(t){const e=w.getPossibleActions(this.state).filter(n=>n!==t),s=t===null?null:this.getAction(t);return s??(console.warn("WARNING: no action found"),e.length>0?e[0]:r.D)}getAction(t){const s=Array.from(this.qValues.entries()).filter(([n])=>n!==t).reduce((n,i)=>i[1]>n[1]?i:n,[t,-1/0]);return s[1]>-1/0?s[0]:void 0}getMaxValue(){return this.qValues.size===0?0:Math.max(...Array.from(this.qValues.values()))}}class Q{static updateQTable(t,e,s,n,i,l,o,h){let g;if(l)g=i;else{const f=this.getQValue(t,e,s),T=this.getMaxQValue(t,n);g=this.calcQValue(i,f,T,h,o)}this.addStateWithZeroValuesToQTableIfStateNotExist(t,e),this.updateQTableEntry(t,e.getHashCodeV2(),s,g)}static updateQTableEntry(t,e,s,n){var i;(i=t.get(e))==null||i.setValue(s,n)}static getMaxQValue(t,e){var n;const s=e.getHashCodeV2();return this.addStateWithZeroValuesToQTableIfStateNotExist(t,e),((n=t.get(s))==null?void 0:n.getMaxValue())??0}static getQValue(t,e,s){var i;const n=e.getHashCodeV2();return this.addStateWithZeroValuesToQTableIfStateNotExist(t,e),((i=t.get(n))==null?void 0:i.getValue(s))??0}static addStateWithZeroValuesToQTableIfStateNotExist(t,e){t.has(e.getHashCodeV2())||this.addStateWithZeroValuesToQTable(t,e)}static addStateWithZeroValuesToQTable(t,e){const s=e.getHashCodeV2(),n=new B(e);t.set(s,n)}static calcQValue(t,e,s,n,i){return e+i*(t+n*s-e)}}const y=class y{static runEpisode(t,e,s,n,i){const l=new U,o=new w(t);o.reset();let h=o.getInitState();o.prntInfo();const g=.5;let f=!1,T=0;for(;!f&&T<50;){T++;let S;const A=w.getPossibleActions(h);if(o.reverseAction!==null){const q=A.indexOf(o.reverseAction);A.splice(q,1)}l.nextDouble()S+A.qValues.size,0),L=`Episode ${i} done, states count: ${D}, experience size: ${y.experience.size}`;console.log(L),console.log("")}static replayExperience(t,e,s,n,i){Array.from(t).sort(()=>Math.random()-.5).slice(0,i).forEach(o=>{Q.updateQTable(e,o.getState(),o.getAction(),o.getNewState(),o.getReward(),o.isDone(),s,n)})}};c(y,"experience",new Set);let R=y;class U{nextDouble(){return Math.random()}}class K{static train(t,e,s){const l=E.generateLessons(),o=(g,f)=>{R.runEpisode(g,t,.9,.1,f)},h=g=>{for(let f=0;fArray.from(o.qValues.values())),s=e.reduce((o,h)=>o+h,0),n=e.length,i=n?s/n:0,l=new W;return l.count=n,l.sum=s,l.average=i,l}static testQTable(t){u.prnt("********************* test q table **********************"),u.prnt("********************* test q table **********************"),u.prnt("********************* test q table **********************");let e=0;const s=E.generateLessons(),n=s.length;let i=s[e],l=x.shuffle(E.stateDone,[],1e3),o=new v(l,i),h=i.getGoals();this.prntState(o);let g=!1,f=0,T=null;for(;!g&&f<200;){this.sleep(1e3/2).then(),b.clearScreen(),f++;const L=o.getHashCodeV2();Q.addStateWithZeroValuesToQTableIfStateNotExist(t,o);const S=t.get(L),A=S?S.getActionWithMaxValue(T):r.D;T=p.getReverseAction(A);const V=p.makeMove(o.getState(),A),M=w._isTerminalSuccess(V,h);o=new v(V,i),g=u.equalArrays(o.getState(),E.stateDone),u.prnt(`${f} +---- +`),this.prntState(o),M&&!g&&eo!==s),t.has(n)?((l=t.get(n))==null?void 0:l.getActionWithMaxValue(s))||r.D:this.getRandomAction(i)}static getRandomAction(t){return t.length>0?t[Math.floor(Math.random()*t.length)]:r.D}static sleep(t){return new Promise(e=>setTimeout(e,t))}}class W{constructor(){c(this,"count",0);c(this,"sum",0);c(this,"average",0)}}class Y{static main(){u.prnt("kuku"),C.train(),C.test()}}function j(d){var n,i,l;let t=0;const e=o=>o>=100?o-100:o<=-100?o+100:o,s=o=>{t=e(o);const h=`${t}`;d.innerHTML=h};(n=document.getElementById("increaseByOne"))==null||n.addEventListener("click",()=>s(t+1)),(i=document.getElementById("decreaseByOne"))==null||i.addEventListener("click",()=>s(t-1)),(l=document.getElementById("increaseByTwo"))==null||l.addEventListener("click",()=>s(t+2)),document.getElementById("decreaseByTwo"),s(0)}j(document.getElementById("counter-value"));Y.main(); diff --git a/dist/index.html b/dist/index.html index 3567aec..db78f51 100644 --- a/dist/index.html +++ b/dist/index.html @@ -2,16 +2,16 @@ - + WS playground - - + + @@ -34,7 +34,7 @@

- List of supported technologies + List of supported technologies
diff --git a/index.html b/index.html index 0a273e7..f982cbe 100644 --- a/index.html +++ b/index.html @@ -2,16 +2,15 @@ - + WS playground - - + @@ -34,7 +33,8 @@

- List of supported technologies + List of supported technologies
+ diff --git a/index.html.o b/index.html.o deleted file mode 100644 index f982cbe..0000000 --- a/index.html.o +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - WS playground - - - - -
-

- Thank you for trying it out.
Your first project is up and running - now. -

-
-
-

Counter is

-

-
-
- - - - -
-
-
-
- List of supported technologies -
- - - diff --git a/src/game-15-reconstruction/Action.ts b/src/game-15-reconstruction/Action.ts new file mode 100644 index 0000000..e3f6bfb --- /dev/null +++ b/src/game-15-reconstruction/Action.ts @@ -0,0 +1,6 @@ +export enum Action { + L = "L", + R = "R", + U = "U", + D = "D", +} \ No newline at end of file diff --git a/src/game-15-reconstruction/ConsoleUtils.ts b/src/game-15-reconstruction/ConsoleUtils.ts new file mode 100644 index 0000000..0b03f54 --- /dev/null +++ b/src/game-15-reconstruction/ConsoleUtils.ts @@ -0,0 +1,20 @@ +// utils/ConsoleUtils.ts +export class ConsoleUtils { + public static blue(text: string): string { + return `\x1b[34m${text}\x1b[0m`; // Blue text + } + + public static green(text: string): string { + return `\x1b[32m${text}\x1b[0m`; // Green text + } + + public static color(text: string, colorCode: number): string { + return `\x1b[${colorCode}m${text}\x1b[0m`; // Color by code + } + + public static clearScreen() { + console.log("clearScrn ... "); + } + + +} \ No newline at end of file diff --git a/src/game-15-reconstruction/Environment.ts b/src/game-15-reconstruction/Environment.ts new file mode 100644 index 0000000..181c7ba --- /dev/null +++ b/src/game-15-reconstruction/Environment.ts @@ -0,0 +1,145 @@ +import { Pair } from './utils/Pair'; +import { Action } from './Action'; // Assuming Action is defined elsewhere +import { EnvironmentState } from './EnvironmentState'; // Assuming EnvironmentState is defined elsewhere +import { StateProducer } from './StateProducer'; // Assuming StateProducer is defined elsewhere +import { EnvironmentActionResult } from './EnvironmentActionResult'; // Assuming EnvironmentActionResult is defined elsewhere +import { GameUtils } from './GameUtils'; // Assuming GameUtils is defined elsewhere +import { Utils } from './utils/Utils'; // Assuming Utils is defined elsewhere + +export class Environment { + private state: EnvironmentState; + private static stateProducer: StateProducer; + private goals: number[]; + + public reverseAction: Action | null = null; + private bigCircleAction1: Action[] = []; + private bigCircleAction2: Action[] = []; + private smallCircleAction1: Action[] = []; + private smallCircleAction2: Action[] = []; + private circleAction: Action[] = []; + + constructor(stateProducer: StateProducer) { + Environment.stateProducer = stateProducer; + + Environment.stateProducer.resetState(); + const state = Environment.stateProducer.getState(); + this.goals = Environment.stateProducer.getGoals(); + this.state = new EnvironmentState(state, Environment.stateProducer); + } + + public static isTerminalSuccess(state: EnvironmentState): boolean { + return Environment._isTerminalSuccess(state.getState(), state.getGoals()); + } + + public reset(): void { + this.reverseAction = null; + + this.circleAction = []; + this.bigCircleAction1 = []; + this.bigCircleAction2 = []; + this.smallCircleAction1 = []; + this.smallCircleAction2 = []; + + this.bigCircleAction1.push(Action.L, Action.L, Action.D, Action.D, Action.R, Action.R, Action.U, Action.U); + this.bigCircleAction2.push(Action.R, Action.R, Action.D, Action.D, Action.L, Action.L, Action.U, Action.U); + this.smallCircleAction1.push(Action.L, Action.D, Action.R, Action.U, Action.L); + this.smallCircleAction2.push(Action.R, Action.D, Action.L, Action.U, Action.R); + } + + public getInitState(): EnvironmentState { + Environment.stateProducer.resetState(); + const state = Environment.stateProducer.getState(); + this.goals = Environment.stateProducer.getGoals(); + this.state = new EnvironmentState(state, Environment.stateProducer); + return this.state; + } + + public static getPossibleActions(state: EnvironmentState): Action[] { + const io = state.getState().indexOf(-1); + const fixedStateIndexes = state.getFixedElements().map(e => e - 1); + return GameUtils.getValidMoves(io, fixedStateIndexes); + } + + public executeAction(state0: EnvironmentState, action: Action): EnvironmentActionResult { + const newState = GameUtils.makeMove(state0.getState(), action); + const environmentState = new EnvironmentState(newState, Environment.stateProducer); + + let isTerminal = Environment._isTerminalSuccess(newState, this.goals); + this.state = new EnvironmentState(newState, Environment.stateProducer); + + let r: number = NaN; + + this.reverseAction = GameUtils.getReverseAction(action); + this.circleAction.push(action); + if (this.circleAction.length > 8) this.circleAction.shift(); + + if (Utils.equalArrays(this.circleAction, this.bigCircleAction1) || + Utils.equalArrays(this.circleAction, this.bigCircleAction2) || + Utils.equalArrays(this.circleAction, this.smallCircleAction1) || + Utils.equalArrays(this.circleAction, this.smallCircleAction2)) { + isTerminal = true; + } + + const io = this.state.getState().indexOf(-1); + if (Environment.stateProducer.isLockedIndex(io)) { + isTerminal = true; + r = -1; + } + + if (isNaN(r)) { + r = this.getReward(newState, this.goals); + } + + return new EnvironmentActionResult(environmentState, action, r, isTerminal); + } + + public static _isTerminalSuccess(newState: number[], goals: number[]): boolean { + if (newState.length !== 16) throw new Error("newState.size() != 16"); + + return goals.filter(e => newState[e - 1] === e).length === goals.length; + } + + public prntInfo(): void { + Utils.prnt("\n\n================================================\n"); + + const state = this.state.getState(); + const io = state.indexOf(-1); + const xy = GameUtils.getXY(io); + Utils.prnt(xy); + const indx = GameUtils.getIndex(xy.getKey(), xy.getValue()); // Assuming getXY returns a tuple + Utils.prnt(`${io} - ${indx}`); + const moves = GameUtils._getValidMoves(io); + Utils.prnt(moves); + const r = this.getReward(state, this.goals); + Utils.prnt(r); + // prnt("\n"); + + const stateAsString = GameUtils.stateAsString(state, this.goals); + Utils.prnt(stateAsString); + } + + private getReward(state: number[], goals: number[]): number { + const ih = state.indexOf(-1); + const xyh = GameUtils.getXY(ih); + + const floatStream = goals.map(e => this.getDistance(GameUtils.getXY(state.indexOf(e)), GameUtils.getXY(e - 1))); + const d0Sum = floatStream.reduce((acc, val) => acc + val, 0); + + if (d0Sum === 0) { + return 100.5; + } + + const d1Sum = goals.reduce((acc, e) => acc + this.getDistance(GameUtils.getXY(state.indexOf(e)), xyh), 0); + + Utils.prnt(`d0Sum: ${d0Sum}`); + Utils.prnt(`d1Sum: ${d1Sum}`); + + return 1 / (d0Sum + d1Sum); + } + + private getDistance(v1: Pair, v2: Pair): number { + const pow1 = Math.pow(v2.getKey() - v1.getKey(), 2); + const pow2 = Math.pow(v2.getValue() - v1.getValue(), 2); + return Math.sqrt(pow1 + pow2); + } +} \ No newline at end of file diff --git a/src/game-15-reconstruction/EnvironmentActionResult.ts b/src/game-15-reconstruction/EnvironmentActionResult.ts new file mode 100644 index 0000000..b7c65bd --- /dev/null +++ b/src/game-15-reconstruction/EnvironmentActionResult.ts @@ -0,0 +1,21 @@ +import { Action } from './Action'; +import { EnvironmentState } from './EnvironmentState'; + +export class EnvironmentActionResult { + state: EnvironmentState; + action: Action; + reward: number; + isTerminal: boolean; + + constructor( + state: EnvironmentState, + action: Action, + reward: number = 0, + isTerminal: boolean = false + ) { + this.state = state; // Initialize with a default if not provided + this.action = action; // Assuming Action has a default like NONE + this.reward = reward; + this.isTerminal = isTerminal; + } +} diff --git a/src/game-15-reconstruction/EnvironmentState.ts b/src/game-15-reconstruction/EnvironmentState.ts new file mode 100644 index 0000000..8067784 --- /dev/null +++ b/src/game-15-reconstruction/EnvironmentState.ts @@ -0,0 +1,90 @@ +import { StateProducer } from './StateProducer'; + +export class EnvironmentState { + private state: number[]; + private goals: number[]; + public fixedElements: number[]; + + constructor(state: number[], stateProducer: StateProducer); + constructor(state: EnvironmentState); + constructor(state: number[] | EnvironmentState, stateProducer?: StateProducer) { + if (state instanceof EnvironmentState) { + // Copy constructor logic + this.state = [...state.getState()]; + this.goals = [...state.getGoals()]; + this.fixedElements = [...state.getFixedElements()]; + } else { + // Regular constructor logic + this.state = [...state]; + this.goals = [...(stateProducer?.getGoals() ?? [])]; + this.fixedElements = [...(stateProducer?.getLockedStateElements() ?? [])]; + } + } + + public getState(): number[] { + return this.state; + } + + public setState(state: number[]): void { + this.state = state; + } + + public getGoals(): number[] { + return this.goals; + } + + public setGoals(goals: number[]): void { + this.goals = goals; + } + + public getFixedElements(): number[] { + return this.fixedElements; + } + + public setFixedElements(fixedElements: number[]): void { + this.fixedElements = fixedElements; + } + + public getHashCodeV2(): number { + const hashCode = this.getHashCodeV3__(); + return this.hashString(hashCode); + } + + public getHashCodeV3__(): string { + return Array.from({ length: 16 }, (_, e) => { + let v: string; + const o = this.state[e]; + if (o === -1) v = "*"; + else if (this.goals.includes(o)) v = String(o); + else if (this.goals.includes(e + 1)) v = "o"; + else v = " "; + v = v + "\t"; + if (e !== 0 && (e + 1) % 4 === 0) v = v + "\n"; + return v; + }).join(""); + } + + public getHashCode(): number { + const a = this.state.map(String).join(","); + return this.hashString(a); + } + + private hashString(str: string): number { + let hash = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = (hash << 5) - hash + char; + hash |= 0; // Convert to 32bit integer + } + return hash; + } + + public equals(other: EnvironmentState): boolean { + return this.getHashCode() === other.getHashCode(); + } + + public hashCode(): number { + return this.getHashCode(); + } + } + \ No newline at end of file diff --git a/src/game-15-reconstruction/EpisodeRunner.ts b/src/game-15-reconstruction/EpisodeRunner.ts new file mode 100644 index 0000000..c4f7e3a --- /dev/null +++ b/src/game-15-reconstruction/EpisodeRunner.ts @@ -0,0 +1,115 @@ +import { QTableRow } from './QTableRow'; +import { Environment } from './Environment'; +import { StateProducer } from './StateProducer'; +import { ExperienceRecord } from './ExperienceRecord'; +import { QTableGenerator } from './QTableGenerator'; +import { QTableUpdater } from './QTableUpdater'; +import { Action } from './Action'; +import { EnvironmentActionResult } from './EnvironmentActionResult'; + +export class EpisodeRunner { + private static experience: Set = new Set(); + + public static runEpisode( + stateProducer: StateProducer, + qTable: Map, + // random: Random, + discount: number, + learningRate: number, + episode: number + ): void { + const random = new Random(); + const environment = new Environment(stateProducer); + environment.reset(); + let state0 = environment.getInitState(); + environment.prntInfo(); + + const epsilon = 0.5; + let isTerminal = false; + let step = 0; + + while (!isTerminal && step < 50) { + step++; + let action: Action; + + const possibleActions = Environment.getPossibleActions(state0); + if (environment.reverseAction !== null) { + const index = possibleActions.indexOf(environment.reverseAction); + possibleActions.splice(index, 1); // Remove reverse action + } + + if (random.nextDouble() < epsilon) { // Explore + console.log("\nrndm move"); + action = QTableGenerator.getRandomAction(possibleActions); + } else { // Exploit + console.log("\nqTable move"); + action = QTableGenerator.getAction(qTable, state0, environment.reverseAction); + + if (!possibleActions.includes(action)) { + action = QTableGenerator.getAction(qTable, state0, environment.reverseAction); + } + } + + console.log("\n--------------------------------------------------------"); + console.log("\naction: " + action); + + if (!possibleActions.includes(action)) { + // throw new Error("!possibleActions.contains(action)"); + } + + const result: EnvironmentActionResult = environment.executeAction(state0, action); + const state1 = result.state; + environment.prntInfo(); + + const reward = result.reward; + isTerminal = result.isTerminal; + + if (isTerminal) { + isTerminal = true; + } + + const record = new ExperienceRecord(state0, action, reward, isTerminal, state1); + EpisodeRunner.experience.add(record); + state0 = result.state; + } + + EpisodeRunner.replayExperience(EpisodeRunner.experience, qTable, learningRate, discount, 1000); + + const count = Array.from(qTable.values()).reduce((acc, e) => acc + e.qValues.size, 0); + const message = `Episode ${episode} done, states count: ${count}, experience size: ${EpisodeRunner.experience.size}`; + console.log(message); + console.log(""); + } + + public static replayExperience( + experience: Set, + qTable: Map, + learningRate: number, + discount: number, + sampleSize: number + ): void { + const sampledExperience = Array.from(experience) + .sort(() => Math.random() - 0.5) // Shuffle + .slice(0, sampleSize); // Limit to sampleSize + + sampledExperience.forEach(e => { + QTableUpdater.updateQTable( + qTable, + e.getState(), + e.getAction(), + e.getNewState(), + e.getReward(), + e.isDone(), + learningRate, + discount + ); + }); + } +} + +// Random class to handle random number generation +class Random { + public nextDouble(): number { + return Math.random(); + } +} diff --git a/src/game-15-reconstruction/ExperienceRecord.ts b/src/game-15-reconstruction/ExperienceRecord.ts new file mode 100644 index 0000000..bacd6df --- /dev/null +++ b/src/game-15-reconstruction/ExperienceRecord.ts @@ -0,0 +1,47 @@ +import { EnvironmentState } from './EnvironmentState'; // Adjust the import according to your project structure +import { Action } from './Action'; // Adjust the import according to your project structure + +export class ExperienceRecord { + private readonly state: EnvironmentState; + private readonly action: Action; + private readonly reward: number; + private readonly done: boolean; + private readonly newState: EnvironmentState; + + constructor(state: EnvironmentState, action: Action, reward: number, done: boolean, newState: EnvironmentState) { + this.state = new EnvironmentState(state); // Assuming a copy constructor exists in TypeScript + this.action = action; + this.reward = reward; + this.done = done; + this.newState = new EnvironmentState(newState); // Assuming a copy constructor exists in TypeScript + } + + public getState(): EnvironmentState { + return this.state; + } + + public getAction(): Action { + return this.action; + } + + public getReward(): number { + return this.reward; + } + + public isDone(): boolean { + return this.done; + } + + public getNewState(): EnvironmentState { + return this.newState; + } + + public equals(other: ExperienceRecord): boolean { + if (!(other instanceof ExperienceRecord)) return false; + return this.hashCode() === other.hashCode(); + } + + public hashCode(): number { + return this.state.getHashCodeV2() ^ this.newState.getHashCodeV2(); // Use bitwise XOR for combining hashes + } +} diff --git a/src/game-15-reconstruction/GameUtils.ts b/src/game-15-reconstruction/GameUtils.ts new file mode 100644 index 0000000..82a897b --- /dev/null +++ b/src/game-15-reconstruction/GameUtils.ts @@ -0,0 +1,103 @@ + + +// utils/GameUtils.ts +import { Action } from './Action'; +import { Pair } from './utils/Pair'; +import { ConsoleUtils } from './ConsoleUtils'; + +export class GameUtils { + + public static makeMove(state: number[], action: Action): number[] { + const hole = -1; + const i0 = state.indexOf(hole); + const xy = this.getXY(i0); + let x = xy.getKey(); + let y = xy.getValue(); + + if (action === Action.L) x -= 1; + if (action === Action.R) x += 1; + if (action === Action.U) y -= 1; + if (action === Action.D) y += 1; + + const i1 = this.getIndex(x, y); + + // Clone state array and perform swap + const newState = [...state]; + const v = newState[i1]; + newState[i0] = v; + newState[i1] = hole; + return newState; + } + + public static getXY(index: number): Pair { + const x = index % 4; + const y = Math.floor(index / 4); + return Pair.P(x, y); + } + + public static getIndex(x: number, y: number): number { + return y * 4 + x; + } + + public static _getValidMoves(index: number): Action[] { + const xy = this.getXY(index); + const moves = Object.values(Action).slice(); // Clone Action enum values + + if (xy.getKey() === 0) moves.splice(moves.indexOf(Action.L), 1); + if (xy.getKey() === 3) moves.splice(moves.indexOf(Action.R), 1); + if (xy.getValue() === 0) moves.splice(moves.indexOf(Action.U), 1); + if (xy.getValue() === 3) moves.splice(moves.indexOf(Action.D), 1); + + return moves; + } + + public static getValidMoves(i: number, fixedStateIndexes: number[]): Action[] { + const moves = this._getValidMoves(i); + const xy = this.getXY(i); + + if (this.contains(fixedStateIndexes, xy.getKey() - 1, xy.getValue())) { + moves.splice(moves.indexOf(Action.L), 1); + } + if (this.contains(fixedStateIndexes, xy.getKey() + 1, xy.getValue())) { + moves.splice(moves.indexOf(Action.R), 1); + } + if (this.contains(fixedStateIndexes, xy.getKey(), xy.getValue() - 1)) { + moves.splice(moves.indexOf(Action.U), 1); + } + if (this.contains(fixedStateIndexes, xy.getKey(), xy.getValue() + 1)) { + moves.splice(moves.indexOf(Action.D), 1); + } + + return moves; + } + + private static contains(fixedStateIndexes: number[], x: number, y: number): boolean { + return fixedStateIndexes.includes(this.getIndex(x, y)); + } + + public static stateAsString(state: number[], goals: number[]): string { + return Array.from({ length: 16 }, (_, e) => { + let v: string; + const o = state[e]; + + if (o === -1) v = ConsoleUtils.blue("*"); + else if (goals.includes(o)) v = ConsoleUtils.color(o.toString(), 100); + else if (goals.includes(e + 1)) v = ConsoleUtils.green(o.toString()); + else v = o.toString(); + + v += "\t"; + if (e !== 0 && (e + 1) % 4 === 0) { + v += "\n"; + } + return v; + }).join(''); + } + + public static getReverseAction(action: Action): Action | null { + return action === Action.D ? Action.U + : action === Action.U ? Action.D + : action === Action.L ? Action.R + : action === Action.R ? Action.L + : null; + } +} diff --git a/src/game-15-reconstruction/Main.ts b/src/game-15-reconstruction/Main.ts new file mode 100644 index 0000000..e9edf11 --- /dev/null +++ b/src/game-15-reconstruction/Main.ts @@ -0,0 +1,12 @@ +import { QTableGenerator } from './QTableGenerator'; +import { Utils } from './utils/Utils'; + +export class Main { + + public static main() { + + Utils.prnt("kuku"); + QTableGenerator.train(); + QTableGenerator.test(); + } +} diff --git a/src/game-15-reconstruction/QTableGenerator.ts b/src/game-15-reconstruction/QTableGenerator.ts new file mode 100644 index 0000000..9584a73 --- /dev/null +++ b/src/game-15-reconstruction/QTableGenerator.ts @@ -0,0 +1,144 @@ +// import { SerializedObjectLoader } from './utils/SerializedObjectLoader'; +import { Trainer } from './Trainer'; +import { QTableRow } from './QTableRow'; +import { StateProducer } from './StateProducer'; +import { StateShuffle } from './StateShuffle'; +import { Environment } from './Environment'; +import { EnvironmentState } from './EnvironmentState'; +import { Action } from './Action'; +import { ConsoleUtils } from './ConsoleUtils'; +import { GameUtils } from './GameUtils'; +import { QTableUpdater } from './QTableUpdater'; +import { Utils } from './utils/Utils'; + +export class QTableGenerator { + + public static train(): void { + const filePath = "qTable.ser"; + const qTable = this.loadQTable(filePath); + Trainer.train(qTable, filePath, 10); + } + + public static test(): void { + const filePath = "qTable.ser"; + const qTable = this.loadQTable(filePath); + while (true) { + this.testQTable(qTable); + } + } + + public static loadQTable(filePath: string): Map { + let qTable = new Map(); + try { + // qTable = SerializedObjectLoader.load(filePath).then(); + Utils.prnt(filePath); + Utils.prnt('qTable not loaded .. newwd to implement .. use some local storage '); + } catch (error) { + Utils.prnt(error); + } + + const stats = this.getStatistics(qTable); + Utils.prnt(stats); + + return qTable; + } + + private static getStatistics(qTable: Map): Stats { + const values = Array.from(qTable.values()).flatMap(row => Array.from(row.qValues.values())); + const sum = values.reduce((a, b) => a + b, 0); + const count = values.length; + const average = count ? sum / count : 0; + const stats = new Stats(); + stats.count = count; + stats.sum = sum; + stats.average = average; + return stats; + } + + public static testQTable(qTable: Map): void { + Utils.prnt("********************* test q table **********************"); + Utils.prnt("********************* test q table **********************"); + Utils.prnt("********************* test q table **********************"); + + let lessonNo = 0; + const lessons = StateProducer.generateLessons(); + const lessonCount = lessons.length; + let stateProducer = lessons[lessonNo]; + let v = StateShuffle.shuffle(StateProducer.stateDone, [], 1000); + let state = new EnvironmentState(v, stateProducer); + let goals = stateProducer.getGoals(); + + this.prntState(state); + + let gameOver = false; + let step = 0; + let reverseAction: Action | null = null; + + while (!gameOver && step < 200) { + this.sleep(1000 / 2).then(); + ConsoleUtils.clearScreen(); + + step++; + const state0Hash = state.getHashCodeV2(); + + QTableUpdater.addStateWithZeroValuesToQTableIfStateNotExist(qTable, state); + + const qTableRow = qTable.get(state0Hash); + const action = qTableRow ? qTableRow.getActionWithMaxValue(reverseAction) : Action.D; + + reverseAction = GameUtils.getReverseAction(action); + + const newState = GameUtils.makeMove(state.getState(), action); + const isTerminal = Environment._isTerminalSuccess(newState, goals); + + state = new EnvironmentState(newState, stateProducer); + gameOver = Utils.equalArrays(state.getState(), StateProducer.stateDone); + + Utils.prnt(`${step}\n----\n`); + this.prntState(state); + + if (isTerminal && !gameOver && lessonNo < lessonCount - 1) { + lessonNo++; + stateProducer = lessons[lessonNo]; + goals = stateProducer.getGoals(); + Utils.prnt(`lesson change: ${lessonNo}`); + Utils.prnt(goals); + state = new EnvironmentState(state.getState(), stateProducer); + } + } + + const isTerminalSuccess = Environment.isTerminalSuccess(state); + Utils.prnt(`success: ${isTerminalSuccess}`); + this.sleep(3000).then(); + } + + private static prntState(state: EnvironmentState): void { + const s = GameUtils.stateAsString(state.getState(), state.getGoals()); + Utils.prnt(s); + } + + public static getAction(qTable: Map, currentState: EnvironmentState, lastAction: Action | null): Action { + const hash = currentState.getHashCodeV2(); + let possibleActions = Environment.getPossibleActions(currentState); + possibleActions = possibleActions.filter(action => action !== lastAction); + return qTable.has(hash) + ? qTable.get(hash)?.getActionWithMaxValue(lastAction) || Action.D + : this.getRandomAction(possibleActions); + } + + public static getRandomAction(possibleActions: Action[]): Action { + return possibleActions.length > 0 + ? possibleActions[Math.floor(Math.random() * possibleActions.length)] + : Action.D; + } + + private static sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } +} + +class Stats { + public count: number = 0; + public sum: number = 0; + public average: number = 0; +} diff --git a/src/game-15-reconstruction/QTableRow.ts b/src/game-15-reconstruction/QTableRow.ts new file mode 100644 index 0000000..9e0f66a --- /dev/null +++ b/src/game-15-reconstruction/QTableRow.ts @@ -0,0 +1,55 @@ +import { Action } from './Action'; +import { Environment } from './Environment'; +import { EnvironmentState } from './EnvironmentState'; + +export class QTableRow { + private state: EnvironmentState; + public qValues: Map; + + constructor(state: EnvironmentState) { + this.state = state; + this.qValues = new Map(); + } + + public setValue(action: Action, qValue: number): void { + const moves = Environment.getPossibleActions(this.state); + if (this.qValues.size === 0) { + moves.forEach(e => this.qValues.set(e, 0)); + } + if (!moves.includes(action)) { + console.warn("WARNING: !moves.includes(action)"); + return; + } + this.qValues.set(action, qValue); + } + + public getValue(action: Action): number { + return this.qValues.get(action) || 0; + } + + public getActionWithMaxValue(lastAction: Action | null): Action { + const possibleActions = Environment.getPossibleActions(this.state).filter(action => action !== lastAction); + const actionOption = lastAction === null ? null : this.getAction(lastAction); + + if (actionOption === null || actionOption === undefined) { + console.warn("WARNING: no action found"); + return possibleActions.length > 0 ? possibleActions[0] : Action.D; // Default action + } else { + return actionOption; + } + } + + private getAction(lastAction: Action): Action | undefined { + const filteredEntries = Array.from(this.qValues.entries()).filter(([key]) => key !== lastAction); + const maxEntry = filteredEntries.reduce((max, entry) => (entry[1] > max[1] ? entry : max), [lastAction, -Infinity]); + return maxEntry[1] > -Infinity ? maxEntry[0] : undefined; + } + + public getMaxValue(): number { + // Check if qValues is empty + if (this.qValues.size === 0) return 0; + + // Convert the values of the map to an array and get the maximum value + return Math.max(...Array.from(this.qValues.values())); + } +} diff --git a/src/game-15-reconstruction/QTableUpdater.ts b/src/game-15-reconstruction/QTableUpdater.ts new file mode 100644 index 0000000..1d610a9 --- /dev/null +++ b/src/game-15-reconstruction/QTableUpdater.ts @@ -0,0 +1,68 @@ +import { Action } from './Action'; // Adjust the import according to your project structure +import { EnvironmentState } from './EnvironmentState'; // Adjust the import according to your project structure +import { QTableRow } from './QTableRow'; // Adjust the import according to your project structure + +export class QTableUpdater { + + public static updateQTable( + qTable: Map, + state0: EnvironmentState, + action: Action, + state1: EnvironmentState, + reward: number, + isTerminal: boolean, + learningRate: number, + discount: number + ): void { + let _qValue: number; + + if (isTerminal) { + _qValue = reward; + } else { + const qValue = this.getQValue(qTable, state0, action); + const nextQValue = this.getMaxQValue(qTable, state1); + _qValue = this.calcQValue(reward, qValue, nextQValue, discount, learningRate); + } + + this.addStateWithZeroValuesToQTableIfStateNotExist(qTable, state0); + this.updateQTableEntry(qTable, state0.getHashCodeV2(), action, _qValue); + } + + private static updateQTableEntry(qTable: Map, hash: number, action: Action, qValue: number): void { + qTable.get(hash)?.setValue(action, qValue); + } + + private static getMaxQValue(qTable: Map, state: EnvironmentState): number { + const hashCode = state.getHashCodeV2(); + this.addStateWithZeroValuesToQTableIfStateNotExist(qTable, state); + return qTable.get(hashCode)?.getMaxValue() ?? 0; // Default to 0 if not found + } + + private static getQValue(qTable: Map, state: EnvironmentState, action: Action): number { + const hashCode = state.getHashCodeV2(); + this.addStateWithZeroValuesToQTableIfStateNotExist(qTable, state); + return qTable.get(hashCode)?.getValue(action) ?? 0; // Default to 0 if not found + } + + public static addStateWithZeroValuesToQTableIfStateNotExist(qTable: Map, state: EnvironmentState): void { + if (qTable.has(state.getHashCodeV2())) return; + this.addStateWithZeroValuesToQTable(qTable, state); + } + + private static addStateWithZeroValuesToQTable(qTable: Map, state: EnvironmentState): void { + const hashCode = state.getHashCodeV2(); + const row = new QTableRow(state); + qTable.set(hashCode, row); // Assuming of() method creates a new instance + } + + private static calcQValue( + reward: number, + qValue: number, + nextQValue: number, + discount: number, + learningRate: number + ): number { + const v = qValue + learningRate * (reward + discount * nextQValue - qValue); + return v; + } +} diff --git a/src/game-15-reconstruction/StateProducer.ts b/src/game-15-reconstruction/StateProducer.ts new file mode 100644 index 0000000..dea4478 --- /dev/null +++ b/src/game-15-reconstruction/StateProducer.ts @@ -0,0 +1,207 @@ +import { Utils } from './utils/Utils'; +import { StateShuffle } from './StateShuffle'; + +export class StateProducer { + private goals: number[]; + private lockedStateElements: number[]; + private state: number[]; + private episodesToTrain: number; + private readonly lessonNb: number; + + public static readonly stateDone: number[] = [ + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, -1 + ]; + + private constructor(lessonNb: number) { + this.lessonNb = lessonNb; + this.goals = []; + this.lockedStateElements = []; + this.state = []; + this.episodesToTrain = 0; + } + + public getState() { + return this.state; + } + + public getGoals() { + return this.goals; + } + + public getLockedStateElements() { + return this.lockedStateElements; + } + + public getEpisodesToTrain() { + return this.episodesToTrain; + } + + public static generateLessons0(): StateProducer[] { + return [ + StateProducer.state1(0), + StateProducer.moveHole(StateProducer.stateX(2, 1), [1, 4]), + StateProducer.moveHole(StateProducer.stateX(3, 2), [2, 5]), + StateProducer.moveHole(StateProducer.state3_4(3), [3, 6]), + StateProducer.moveHole(StateProducer.stateX(5, 4), [6, 7]), + StateProducer.moveHole(StateProducer.stateX(6, 5), [5, 8]), + StateProducer.moveHole(StateProducer.stateX(7, 6), [6, 9]), + StateProducer.moveHole(StateProducer.state7_8(7), [8, 11]), + StateProducer.moveHole(StateProducer.state9_13(8), [10, 11]), + StateProducer.moveHole(StateProducer.state10_15(9), [9, 13]), + StateProducer.state12(10) + ]; + } + + public static generateLessonsV1(): StateProducer[] { + return [ + StateProducer.state1_2(0), + StateProducer.moveHole(StateProducer.state3_4(1), [2, 3, 4]), + StateProducer.moveHole(StateProducer.stateX(5, 2), [6, 7]), + StateProducer.moveHole(StateProducer.stateX(6, 3), [5, 8]), + StateProducer.moveHole(StateProducer.stateX(7, 4), [6, 9]), + StateProducer.moveHole(StateProducer.state7_8(5), [8, 11]), + StateProducer.moveHole(StateProducer.state9_13(6), [10, 11]), + StateProducer.moveHole(StateProducer.state10_15(7), [9, 13]), + StateProducer.state12(8) + ]; + } + + public static generateLessons(): StateProducer[] { + return [ + StateProducer.state1_2(0), + StateProducer.moveHole(StateProducer.state3_4(1), [2, 3, 4]), + StateProducer.moveHole(StateProducer.state5_6(2), [6, 7]), + StateProducer.moveHole(StateProducer.state7_8(3), [8, 9, 6]), + StateProducer.moveHole(StateProducer.state9_13(4), [10, 11]), + StateProducer.moveHole(StateProducer.state10_15(5), [9, 13]), + StateProducer.state12(6) + ]; + } + + private static state1_2(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [1, 2]; + o.lockedStateElements = []; + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, o.lockedStateElements); + return o; + } + + private static state1(lessonNb: number): StateProducer { + return StateProducer.stateX(1, lessonNb); + } + + private static state3_4(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [3, 4]; + o.lockedStateElements = [1, 2]; + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, [1, 2, 3]); + return o; + } + + private static state5_6(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [5, 6]; + o.lockedStateElements = [1, 2, 3, 4]; + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, o.lockedStateElements); + return o; + } + + private static stateX(goal: number, lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [goal]; + o.lockedStateElements = Array.from({ length: goal - 1 }, (_, i) => i + 1); + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, o.lockedStateElements); + return o; + } + + private static moveHole(o: StateProducer, holeIndexes: number[]): StateProducer { + const ih = Utils.shuffleArray(holeIndexes)[0] + const i = o.state.indexOf(-1); + const v = o.state[ih]; + o.state[ih] = -1; + o.state[i] = v; + return o; + } + + private static state7_8(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [7, 8]; + o.lockedStateElements = [1, 2, 3, 4, 5, 6]; + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, [1, 2, 3, 4, 5, 6, 7]); + return o; + } + + private static state9_13(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [9, 13]; + o.lockedStateElements = [1, 2, 3, 4, 5, 6, 7, 8]; + o.state = [...StateProducer.stateDone]; + o.episodesToTrain = 100; + StateProducer.shuffle(o, o.lockedStateElements); + return o; + } + + private static state10_15(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [10, 11, 14, 15]; + o.lockedStateElements = [ + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, + 13 + ]; + o.state = StateShuffle.shuffle([...StateProducer.stateDone], o.lockedStateElements, 500); + o.episodesToTrain = 100; + return o; + } + + private static state12(lessonNb: number): StateProducer { + const o = new StateProducer(lessonNb); + o.goals = [12]; + o.lockedStateElements = [ + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, + 13, 14, 15 + ]; + o.state = StateShuffle.shuffle([...StateProducer.stateDone], o.lockedStateElements, 500); + o.episodesToTrain = 10; + return o; + } + + public isLockedIndex(index: number): boolean { + return this.lockedStateElements.includes(index + 1); + } + + public shuffleState(): void { + this.state = StateShuffle.shuffle(this.state, this.lockedStateElements, 500); + } + + private static shuffle(o: StateProducer, lockedStateElements: number[]): void { + let v0 = o.state.filter(e => !lockedStateElements.includes(e)); + v0 = Utils.shuffleArray(v0); + o.state = [...lockedStateElements, ...v0]; + } + + public resetState(): void { + const o = StateProducer.generateLessons()[this.lessonNb]; + this.goals = [...o.goals]; + this.lockedStateElements = [...o.lockedStateElements]; + this.state = [...o.state]; + } + + // Additional methods and classes like StateShuffle are assumed to be defined elsewhere. +} diff --git a/src/game-15-reconstruction/StateShuffle.ts b/src/game-15-reconstruction/StateShuffle.ts new file mode 100644 index 0000000..2c18969 --- /dev/null +++ b/src/game-15-reconstruction/StateShuffle.ts @@ -0,0 +1,40 @@ +import { StateProducer } from './StateProducer'; +import { GameUtils } from './GameUtils'; +import { Utils } from './utils/Utils'; +// import { Action } from './Action'; + +export class StateShuffle { + public static main(): void { + const stateProducer = StateProducer.generateLessons()[0]; + let state = [...stateProducer.getState()]; + const goals = [...stateProducer.getGoals()]; + + StateShuffle.prntState(state, goals); + state = StateShuffle.shuffle(state, stateProducer.getLockedStateElements(), 1000); + StateShuffle.prntState(state, goals); + } + + public static shuffle(state: number[], lockedStateElements: number[], steps: number): number[] { + const fixedStateIndexes = lockedStateElements.map(e => e - 1); + + let i = 0; + while (i < steps) { + state = StateShuffle.makeRandomMove(state, fixedStateIndexes); + i++; + } + + return state; + } + + private static prntState(state: number[], goals: number[]): void { + Utils.prnt(GameUtils.stateAsString(state, goals)); + } + + static makeRandomMove(state: number[], fixedStateIndexes: number[]): number[] { + const i = state.indexOf(-1); + const moves = GameUtils.getValidMoves(i, fixedStateIndexes); + const action = Utils.shuffleArray(moves)[0]; + state = GameUtils.makeMove(state, action); + return state; + } +} diff --git a/src/game-15-reconstruction/Trainer.ts b/src/game-15-reconstruction/Trainer.ts new file mode 100644 index 0000000..a1ce7f8 --- /dev/null +++ b/src/game-15-reconstruction/Trainer.ts @@ -0,0 +1,30 @@ +import { QTableRow } from './QTableRow'; // Adjust the import according to your project structure +import { SerializedObjectSaver } from './utils/SerializedObjectSaver'; // Adjust the import according to your project structure +import { StateProducer } from './StateProducer'; // Adjust the import according to your project structure +import { EpisodeRunner } from './EpisodeRunner'; // Adjust the import according to your project structure + +export class Trainer { + public static train(qTable: Map, filePath: string, n: number): void { + const discount = 0.9; + const learningRate = 0.1; + + const lessons = StateProducer.generateLessons(); + + const episodeRunner = (stateProducer: StateProducer, episode: number): void => { + EpisodeRunner.runEpisode(stateProducer, qTable, discount, learningRate, episode); + }; + + const stateProducerConsumer = (stateProducer: StateProducer): void => { + for (let episode = 0; episode < stateProducer.getEpisodesToTrain(); episode++) { + episodeRunner(stateProducer, episode); + } + }; + + for (let i = 0; i < n; i++) { + lessons.forEach(stateProducerConsumer); + } + + console.log("\ntraining done"); + SerializedObjectSaver.save(filePath, qTable); + } +} \ No newline at end of file diff --git a/src/game-15-reconstruction/utils/Pair.ts b/src/game-15-reconstruction/utils/Pair.ts new file mode 100644 index 0000000..b301baa --- /dev/null +++ b/src/game-15-reconstruction/utils/Pair.ts @@ -0,0 +1,16 @@ +// utils/Pair.ts +export class Pair { + constructor(private key: K, private value: V) {} + + public getKey(): K { + return this.key; + } + + public getValue(): V { + return this.value; + } + + public static P(key: K, value: V): Pair { + return new Pair(key, value); + } +} \ No newline at end of file diff --git a/src/game-15-reconstruction/utils/SerializedObjectLoader.ts b/src/game-15-reconstruction/utils/SerializedObjectLoader.ts new file mode 100644 index 0000000..c555ff4 --- /dev/null +++ b/src/game-15-reconstruction/utils/SerializedObjectLoader.ts @@ -0,0 +1,32 @@ +// utils/SerializedObjectLoader.ts +// import * as fs from 'fs/promises'; // Import fs from fs/promises for promise-based I/O +// import * as path from 'path'; +import { Utils } from './Utils'; // Assuming Utils has a similar prnt method + +export class SerializedObjectLoader { + + public static async load(filePath: string) { + // const absolutePath = path.resolve(filePath); // Get the absolute path + Utils.prnt(filePath); + } + + // private static async _load(filePath: string): Promise { + // try { + // const data = await fs.readFile(filePath, 'utf-8'); // Read the file asynchronously + // const obj = JSON.parse(data); // Parse the JSON string into an object + // return obj; // Return the loaded object + // } catch (error) { + // Utils.prnt((error as Error).message); // Print the error message + // throw error; // Rethrow the error + // } + // } + + // public static async loadFrom(filePath: string): Promise { + // try { + // return await this.load(filePath); // Try loading the object + // } catch (error) { + // Utils.prnt((error as Error).message); // Print the error message + // return null; // Return null on error + // } + // } +} diff --git a/src/game-15-reconstruction/utils/SerializedObjectSaver.ts b/src/game-15-reconstruction/utils/SerializedObjectSaver.ts new file mode 100644 index 0000000..fef3c06 --- /dev/null +++ b/src/game-15-reconstruction/utils/SerializedObjectSaver.ts @@ -0,0 +1,42 @@ +// utils/SerializedObjectSaver.ts +// import * as fs from 'fs'; +// import * as path from 'path'; +import { Utils } from './Utils'; // Assuming Utils has a similar prnt method + +export class SerializedObjectSaver { + + // private static _save(filePath: string, obj: T): void { + // Utils.prnt(filePath); + // Utils.prnt(obj); + // const fileName = path.resolve(filePath); // Get the absolute path + + // try { + // const jsonData = JSON.stringify(obj); // Convert object to JSON string + // fs.writeFileSync(fileName, jsonData); // Write JSON to file + // } catch (error) { + // Utils.prnt(error.message); // Handle error, print the message + // throw error; // Rethrow the error for further handling + // } + // } + + public static save(filePath: string, object: T): void { + Utils.prnt(filePath); + Utils.prnt(object); + // const dirPath = path.dirname(filePath); // Get the directory of the file path + // try { + // // Create directories if they do not exist + // if (!fs.existsSync(dirPath)) { + // fs.mkdirSync(dirPath, { recursive: true }); // Create directory recursively + // } + // this._save(filePath, object); // Call the save function + // } catch (error) { + // Utils.prnt((error as Error).message); // Print the error message + // throw error; // Rethrow the error + // } + } + + // public static getFilePath(relativePath: string): string { + + // // return path.resolve('', relativePath); // Resolve to absolute path + // } +} diff --git a/src/game-15-reconstruction/utils/Utils.ts b/src/game-15-reconstruction/utils/Utils.ts new file mode 100644 index 0000000..0feab83 --- /dev/null +++ b/src/game-15-reconstruction/utils/Utils.ts @@ -0,0 +1,78 @@ +// import * as fs from 'fs'; + +export class Utils { + private static emptyString = ' '; + + public static prnt(o: any): void { + console.log(o); + } + + public static str(o: any, len: number): string { + const v = o.toString(); + if (v.length >= len) return v; + const i = len - v.length; + return v + Utils.emptyString.substring(0, i); + } + + public static sum(l: number[]): number { + if (!l) { + return 0; + } + + // Filter out zeros + l = l.filter(e => e !== 0); + + if (l.length === 0) { + return 0; + } + + // Use reduce to sum the list elements + return l.reduce((acc, curr) => acc + curr, 0); + } + + public static toString(list: Collection): string { + const collect = Array.from(list).map(String).join(', '); + return `[${collect}]`; + } + + public static px(): (t: T, u: T) => T { + return (t: T, _: T) => t; + } + + // public static writeTextToFile(fileName: string, content: string): void { + // try { + // fs.writeFileSync(fileName, content, 'utf8'); + // } catch (error) { + // console.error('Error writing to file:', error); + // } + // } + + // public static _prnt(o: any): void { + // process.stdout.write(String(o)); + // } + + + + public static equalArrays(array: T[], array2: T[]): boolean { + // First, check if the lengths of both arrays are equal + if (array.length !== array2.length) { + return false; // Not equal if lengths differ + } + + // Then, check each corresponding element for equality + for (let i = 0; i < array.length; i++) { + if (array[i] !== array2[i]) { + return false; // Return false if any action is different + } + } + + return true; // Arrays are equal if all checks passed + } + + public static shuffleArray(array: T[]): T[] { + return array.sort(() => Math.random() - 0.5); + } +} + +// Helper type for collections (similar to Java's Collection interface) +type Collection = Iterable | ArrayLike; diff --git a/src/main.ts b/src/main.ts index 0482444..be0e329 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,3 +1,5 @@ +import { Main } from './game-15-reconstruction/Main'; + //TIP With Search Everywhere, you can find any action, file, or symbol in your project. Press , type in terminal, and press . Then run in the terminal and click the link in its output to open the app in the browser. export function setupCounter(element: HTMLElement) { //TIP Try on to see its usages. You can also use this shortcut to jump to a declaration – try it on on line 13. @@ -29,5 +31,6 @@ export function setupCounter(element: HTMLElement) { //TIP To find text strings in your project, you can use the shortcut. Press it and type in counter – you’ll get all matches in one place. setupCounter(document.getElementById('counter-value') as HTMLElement); +Main.main(); //TIP There's much more in WebStorm to help you be more productive. Press and search for Learn WebStorm to open our learning hub with more things for you to try.