From 8e24bdea91f6702af5aed69a42abfe8638119347 Mon Sep 17 00:00:00 2001 From: inter Date: Thu, 4 Sep 2025 14:08:42 +0800 Subject: [PATCH] Add File --- .../easyai/gameRobot/DynamicProgramming.java | 298 ++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 src/main/java/org/dromara/easyai/gameRobot/DynamicProgramming.java diff --git a/src/main/java/org/dromara/easyai/gameRobot/DynamicProgramming.java b/src/main/java/org/dromara/easyai/gameRobot/DynamicProgramming.java new file mode 100644 index 0000000..448ee1b --- /dev/null +++ b/src/main/java/org/dromara/easyai/gameRobot/DynamicProgramming.java @@ -0,0 +1,298 @@ +package org.dromara.easyai.gameRobot; + +import org.dromara.easyai.matrixTools.Matrix; + +import java.util.*; + +/** + * @author lidapeng + * @description 动态规划 + * @date 10:25 上午 2022/9/12 + */ +public class DynamicProgramming { + private final List dynamicStateList = new ArrayList<>();//状态集合 + private final Map actionMap = new HashMap<>();//动作列表 + private final List bestStrategy = new ArrayList<>();//最佳策略 + private float gaMa = 0.5F; //贴现因子 + private float valueTh = 0.0001f;//价值阈值 + private int maxTimes = 500;//策略改进最大迭代次数 + + public void setMaxTimes(int maxTimes) { + this.maxTimes = maxTimes; + } + + public void setValueTh(float valueTh) { + this.valueTh = valueTh; + } + + public void setGaMa(float gaMa) { + this.gaMa = gaMa; + } + + public List getDynamicStateList() { + return dynamicStateList; + } + + public Map getActionMap() { + return actionMap; + } + + public void gameStart() {//遍历所有状态 + for (DynamicState dynamicState : dynamicStateList) { + if (!dynamicState.isFinish()) { + dynamicState.add();//被执行次数+1 + Map> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合 被执行的时候需要修改 + int[] stateId = dynamicState.getStateId(); + for (Map.Entry actionEntry : actionMap.entrySet()) { + Action action = actionEntry.getValue(); + int actionId = action.getActionId();//动作id + List stateList = action.action(stateId); + for (int[] myStateId : stateList) { + DynamicState state = getStateByStateId(myStateId);//经过动作产生的新状态 + //产生一个新的动作-子状态合集的元素 + if (sonStatesMap.containsKey(actionId)) {//在动作集合里 + List dynamicStates = sonStatesMap.get(actionId); + if (!isHere(dynamicStates, state.getStateId())) { + dynamicStates.add(state); + } + } else {//创建动作-子状态集合 + List dynamicStateList = new ArrayList<>(); + dynamicStateList.add(state); + sonStatesMap.put(actionId, dynamicStateList); + } + Map profitMap = state.getProfitMap();//该状态的收益集合,主键是收益,值是次数 被执行的时候需要修改 + state.add(); + //产生一个新的收益 + int profit = action.getProfit(stateId); + if (profitMap.containsKey(profit)) { + profitMap.put(profit, profitMap.get(profit) + 1); + } else { + profitMap.put(profit, 1); + } + } + + } + } + } + } + + public Matrix getValueMatrix() throws Exception {//获取价值矩阵 + int size = dynamicStateList.size(); + int maxX = 0, maxY = 0; + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + int[] stateId = dynamicState.getStateId(); + int x = stateId[0]; + int y = stateId[1]; + if (x > maxX) { + maxX = x; + } + if (y > maxY) { + maxY = y; + } + } + Matrix matrix = new Matrix(maxY + 1, maxX + 1); + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + int[] stateId = dynamicState.getStateId(); + float value = dynamicState.getValue(); + matrix.setNub(stateId[1], stateId[0], value); + } + return matrix; + } + + public List getValueFunction() {//获取价值函数 + List valueFunctions = new ArrayList<>(); + int size = dynamicStateList.size(); + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + ValueFunction valueFunction = new ValueFunction(); + valueFunction.setStateId(dynamicState.getStateId()); + valueFunction.setValue(dynamicState.getValue()); + valueFunctions.add(valueFunction); + } + return valueFunctions; + } + + public List getBestAction(int[] stateId) {//根据当前环境获取策略 + List actions = new ArrayList<>(); + DynamicState state = getStateByStateId(stateId);//当前的环境 + if (state != null) { + Map> sonStatesMap = state.getSonStatesMap(); + float maxValue = 0;//最大价值 + boolean isFirstOne = true; + for (Map.Entry> entry : sonStatesMap.entrySet()) { + List sonStates = entry.getValue();//子状态 + float maxValue2 = 0;//actionId 的最大价值 + boolean isFirstTwo = true; + for (DynamicState dynamicState : sonStates) { + float myValue = dynamicState.getValue(); + if (myValue > maxValue2 || isFirstTwo) { + isFirstTwo = false; + maxValue2 = myValue; + } + } + if (maxValue2 > maxValue || isFirstOne) { + isFirstOne = false; + maxValue = maxValue2; + } + } + //筛选等价值策略 + for (Map.Entry> entry : sonStatesMap.entrySet()) { + int actionId = entry.getKey();//动作id + List sonStates = entry.getValue();//子状态 + for (DynamicState dynamicState : sonStates) { + if (dynamicState.getValue() == maxValue) { + actions.add(actionId); + break; + } + } + } + } + return actions; + } + + public void strategyStudy() throws Exception {//策略学习 + //记录当前最佳策略 + int times = 0; + boolean isDifferent = true;//策略是否不同 + do { + int size = dynamicStateList.size(); + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + if (!dynamicState.isFinish()) { + int actionId = getBestStrategyByPro(dynamicState); + dynamicState.setBestActionId(actionId);//通过概率获取的当前状态最佳策略 + } + } + if (times > 0) { + isDifferent = compareStrategy(); + } + if (isDifferent) {//新老策略不同,重新评估策略 + updateBestStrategy();//更新新策略 + strategyEvaluation(); + } + times++; + } while (isDifferent && times < maxTimes); + } + + private boolean isHere(List dynamicStates, int[] stateId) { + boolean isHere = false; + for (DynamicState dynamicState : dynamicStates) { + if (Arrays.equals(dynamicState.getStateId(), stateId)) { + isHere = true; + break; + } + } + return isHere; + } + + private DynamicState getStateByStateId(int[] stateId) { + DynamicState state = null; + for (DynamicState dynamicState : dynamicStateList) { + if (Arrays.equals(dynamicState.getStateId(), stateId)) { + state = dynamicState; + break; + } + } + return state; + } + + private void updateBestStrategy() {//更新最佳策略 + int size = dynamicStateList.size(); + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + if (!dynamicState.isFinish()) { + bestStrategy.add(dynamicState.getBestActionId()); + } else { + bestStrategy.add(0); + } + } + } + + private boolean compareStrategy() {//比较新老策略 + int size = dynamicStateList.size(); + boolean isDifferent = false; + for (int i = 0; i < size; i++) { + DynamicState dynamicState = dynamicStateList.get(i); + int actionId = bestStrategy.get(i); + if (dynamicState.getBestActionId() != actionId) { + isDifferent = true; + break; + } + } + return isDifferent; + } + + private int getBestStrategyByPro(DynamicState dynamicState) throws Exception {//通过概率获取当前状态下的最佳策略 + Map> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合 + float maxValue = 0;//最大价值 + boolean isFirst = true; + int bestActionId = 0;//最佳动作 + for (Map.Entry> entry : sonStatesMap.entrySet()) { + int actionId = entry.getKey();//动作id + float value = getValueByAction(dynamicState, actionId);//该动作的价值 + if (value > maxValue || isFirst) { + isFirst = false; + maxValue = value; + bestActionId = actionId; + } + } + //返回最佳动作 + return bestActionId; + } + + private void strategyEvaluation() throws Exception {//策略评估 + float maxSub;//最大差值 + do { + maxSub = 0; + for (DynamicState dynamicState : dynamicStateList) {//当前状态 + if (!dynamicState.isFinish()) {//非终结态 + float sub = valueEvaluation(dynamicState);//返回一个差 + if (sub > maxSub) { + maxSub = sub; + } + } + } + } while (maxSub >= valueTh); + } + + private float getValueByAction(DynamicState dynamicState, int actionId) throws Exception {//通过动作获取价值 + Map> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合 + List sonStateListByAction = sonStatesMap.get(actionId);//当前状态最优策略动作下的子状态集合 + if (sonStateListByAction == null) { + throw new Exception("该状态无下一步动作!可能该状态属于终结态,但并没有设置为终结态!"); + } + float number = dynamicState.getNumber();//当前状态被执行的次数即所有子状态被执行的总数 + float updateValue = 0;//更新价值 + for (DynamicState sonState : sonStateListByAction) { + Map profitMap = sonState.getProfitMap();//子状态收益集合 + float sonNumber = sonState.getNumber();//该子状态执行的次数即该子状态所有收益被执行的总次数 + float sonPro = sonNumber / number;//当前子状态在当前最优策略动作下被执行的概率 + float value = sonState.getValue() * gaMa;//该子状态的价值 + //先对r求和 + float sigmaR = 0; + for (Map.Entry entryProfit : profitMap.entrySet()) { + float profit = entryProfit.getKey();//--获取profit的收益 + float profitNumber = entryProfit.getValue();//--获取profit收益的次数 + float profitPro = (profitNumber / sonNumber) * sonPro;//在当前策略动作下产生的环境,产生profit收益的概率 + float v = (value + profit) * profitPro;//价值 + sigmaR = sigmaR + v; + } + updateValue = updateValue + sigmaR; + } + return updateValue; + } + + private float valueEvaluation(DynamicState dynamicState) throws Exception {//价值评估 + float myValue = dynamicState.getValue();//当前价值 + int bestActionId = dynamicState.getBestActionId();//当前最优策略选择的动作 + float updateValue = getValueByAction(dynamicState, bestActionId); + dynamicState.setValue(updateValue);//更新价值 + return (float)Math.abs(myValue - updateValue); + } + //策略迭代 + //状态是当前环境的向量主键与价值函数 + //动作(包含动作的规则)是执行完成一个动作之后返回一个新的状态 + //策略决定执行什么动作,动作执行结束之后的收益是多少 +}