Add File
This commit is contained in:
@@ -0,0 +1,298 @@
|
||||
package org.dromara.easyai.gameRobot;
|
||||
|
||||
import org.dromara.easyai.matrixTools.Matrix;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author lidapeng
|
||||
* @description 动态规划
|
||||
* @date 10:25 上午 2022/9/12
|
||||
*/
|
||||
public class DynamicProgramming {
|
||||
private final List<DynamicState> dynamicStateList = new ArrayList<>();//状态集合
|
||||
private final Map<Integer, Action> actionMap = new HashMap<>();//动作列表
|
||||
private final List<Integer> bestStrategy = new ArrayList<>();//最佳策略
|
||||
private float gaMa = 0.5F; //贴现因子
|
||||
private float valueTh = 0.0001f;//价值阈值
|
||||
private int maxTimes = 500;//策略改进最大迭代次数
|
||||
|
||||
public void setMaxTimes(int maxTimes) {
|
||||
this.maxTimes = maxTimes;
|
||||
}
|
||||
|
||||
public void setValueTh(float valueTh) {
|
||||
this.valueTh = valueTh;
|
||||
}
|
||||
|
||||
public void setGaMa(float gaMa) {
|
||||
this.gaMa = gaMa;
|
||||
}
|
||||
|
||||
public List<DynamicState> getDynamicStateList() {
|
||||
return dynamicStateList;
|
||||
}
|
||||
|
||||
public Map<Integer, Action> getActionMap() {
|
||||
return actionMap;
|
||||
}
|
||||
|
||||
public void gameStart() {//遍历所有状态
|
||||
for (DynamicState dynamicState : dynamicStateList) {
|
||||
if (!dynamicState.isFinish()) {
|
||||
dynamicState.add();//被执行次数+1
|
||||
Map<Integer, List<DynamicState>> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合 被执行的时候需要修改
|
||||
int[] stateId = dynamicState.getStateId();
|
||||
for (Map.Entry<Integer, Action> actionEntry : actionMap.entrySet()) {
|
||||
Action action = actionEntry.getValue();
|
||||
int actionId = action.getActionId();//动作id
|
||||
List<int[]> stateList = action.action(stateId);
|
||||
for (int[] myStateId : stateList) {
|
||||
DynamicState state = getStateByStateId(myStateId);//经过动作产生的新状态
|
||||
//产生一个新的动作-子状态合集的元素
|
||||
if (sonStatesMap.containsKey(actionId)) {//在动作集合里
|
||||
List<DynamicState> dynamicStates = sonStatesMap.get(actionId);
|
||||
if (!isHere(dynamicStates, state.getStateId())) {
|
||||
dynamicStates.add(state);
|
||||
}
|
||||
} else {//创建动作-子状态集合
|
||||
List<DynamicState> dynamicStateList = new ArrayList<>();
|
||||
dynamicStateList.add(state);
|
||||
sonStatesMap.put(actionId, dynamicStateList);
|
||||
}
|
||||
Map<Integer, Integer> profitMap = state.getProfitMap();//该状态的收益集合,主键是收益,值是次数 被执行的时候需要修改
|
||||
state.add();
|
||||
//产生一个新的收益
|
||||
int profit = action.getProfit(stateId);
|
||||
if (profitMap.containsKey(profit)) {
|
||||
profitMap.put(profit, profitMap.get(profit) + 1);
|
||||
} else {
|
||||
profitMap.put(profit, 1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Matrix getValueMatrix() throws Exception {//获取价值矩阵
|
||||
int size = dynamicStateList.size();
|
||||
int maxX = 0, maxY = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
int[] stateId = dynamicState.getStateId();
|
||||
int x = stateId[0];
|
||||
int y = stateId[1];
|
||||
if (x > maxX) {
|
||||
maxX = x;
|
||||
}
|
||||
if (y > maxY) {
|
||||
maxY = y;
|
||||
}
|
||||
}
|
||||
Matrix matrix = new Matrix(maxY + 1, maxX + 1);
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
int[] stateId = dynamicState.getStateId();
|
||||
float value = dynamicState.getValue();
|
||||
matrix.setNub(stateId[1], stateId[0], value);
|
||||
}
|
||||
return matrix;
|
||||
}
|
||||
|
||||
public List<ValueFunction> getValueFunction() {//获取价值函数
|
||||
List<ValueFunction> valueFunctions = new ArrayList<>();
|
||||
int size = dynamicStateList.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
ValueFunction valueFunction = new ValueFunction();
|
||||
valueFunction.setStateId(dynamicState.getStateId());
|
||||
valueFunction.setValue(dynamicState.getValue());
|
||||
valueFunctions.add(valueFunction);
|
||||
}
|
||||
return valueFunctions;
|
||||
}
|
||||
|
||||
public List<Integer> getBestAction(int[] stateId) {//根据当前环境获取策略
|
||||
List<Integer> actions = new ArrayList<>();
|
||||
DynamicState state = getStateByStateId(stateId);//当前的环境
|
||||
if (state != null) {
|
||||
Map<Integer, List<DynamicState>> sonStatesMap = state.getSonStatesMap();
|
||||
float maxValue = 0;//最大价值
|
||||
boolean isFirstOne = true;
|
||||
for (Map.Entry<Integer, List<DynamicState>> entry : sonStatesMap.entrySet()) {
|
||||
List<DynamicState> sonStates = entry.getValue();//子状态
|
||||
float maxValue2 = 0;//actionId 的最大价值
|
||||
boolean isFirstTwo = true;
|
||||
for (DynamicState dynamicState : sonStates) {
|
||||
float myValue = dynamicState.getValue();
|
||||
if (myValue > maxValue2 || isFirstTwo) {
|
||||
isFirstTwo = false;
|
||||
maxValue2 = myValue;
|
||||
}
|
||||
}
|
||||
if (maxValue2 > maxValue || isFirstOne) {
|
||||
isFirstOne = false;
|
||||
maxValue = maxValue2;
|
||||
}
|
||||
}
|
||||
//筛选等价值策略
|
||||
for (Map.Entry<Integer, List<DynamicState>> entry : sonStatesMap.entrySet()) {
|
||||
int actionId = entry.getKey();//动作id
|
||||
List<DynamicState> sonStates = entry.getValue();//子状态
|
||||
for (DynamicState dynamicState : sonStates) {
|
||||
if (dynamicState.getValue() == maxValue) {
|
||||
actions.add(actionId);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return actions;
|
||||
}
|
||||
|
||||
public void strategyStudy() throws Exception {//策略学习
|
||||
//记录当前最佳策略
|
||||
int times = 0;
|
||||
boolean isDifferent = true;//策略是否不同
|
||||
do {
|
||||
int size = dynamicStateList.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
if (!dynamicState.isFinish()) {
|
||||
int actionId = getBestStrategyByPro(dynamicState);
|
||||
dynamicState.setBestActionId(actionId);//通过概率获取的当前状态最佳策略
|
||||
}
|
||||
}
|
||||
if (times > 0) {
|
||||
isDifferent = compareStrategy();
|
||||
}
|
||||
if (isDifferent) {//新老策略不同,重新评估策略
|
||||
updateBestStrategy();//更新新策略
|
||||
strategyEvaluation();
|
||||
}
|
||||
times++;
|
||||
} while (isDifferent && times < maxTimes);
|
||||
}
|
||||
|
||||
private boolean isHere(List<DynamicState> dynamicStates, int[] stateId) {
|
||||
boolean isHere = false;
|
||||
for (DynamicState dynamicState : dynamicStates) {
|
||||
if (Arrays.equals(dynamicState.getStateId(), stateId)) {
|
||||
isHere = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isHere;
|
||||
}
|
||||
|
||||
private DynamicState getStateByStateId(int[] stateId) {
|
||||
DynamicState state = null;
|
||||
for (DynamicState dynamicState : dynamicStateList) {
|
||||
if (Arrays.equals(dynamicState.getStateId(), stateId)) {
|
||||
state = dynamicState;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
private void updateBestStrategy() {//更新最佳策略
|
||||
int size = dynamicStateList.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
if (!dynamicState.isFinish()) {
|
||||
bestStrategy.add(dynamicState.getBestActionId());
|
||||
} else {
|
||||
bestStrategy.add(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean compareStrategy() {//比较新老策略
|
||||
int size = dynamicStateList.size();
|
||||
boolean isDifferent = false;
|
||||
for (int i = 0; i < size; i++) {
|
||||
DynamicState dynamicState = dynamicStateList.get(i);
|
||||
int actionId = bestStrategy.get(i);
|
||||
if (dynamicState.getBestActionId() != actionId) {
|
||||
isDifferent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isDifferent;
|
||||
}
|
||||
|
||||
private int getBestStrategyByPro(DynamicState dynamicState) throws Exception {//通过概率获取当前状态下的最佳策略
|
||||
Map<Integer, List<DynamicState>> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合
|
||||
float maxValue = 0;//最大价值
|
||||
boolean isFirst = true;
|
||||
int bestActionId = 0;//最佳动作
|
||||
for (Map.Entry<Integer, List<DynamicState>> entry : sonStatesMap.entrySet()) {
|
||||
int actionId = entry.getKey();//动作id
|
||||
float value = getValueByAction(dynamicState, actionId);//该动作的价值
|
||||
if (value > maxValue || isFirst) {
|
||||
isFirst = false;
|
||||
maxValue = value;
|
||||
bestActionId = actionId;
|
||||
}
|
||||
}
|
||||
//返回最佳动作
|
||||
return bestActionId;
|
||||
}
|
||||
|
||||
private void strategyEvaluation() throws Exception {//策略评估
|
||||
float maxSub;//最大差值
|
||||
do {
|
||||
maxSub = 0;
|
||||
for (DynamicState dynamicState : dynamicStateList) {//当前状态
|
||||
if (!dynamicState.isFinish()) {//非终结态
|
||||
float sub = valueEvaluation(dynamicState);//返回一个差
|
||||
if (sub > maxSub) {
|
||||
maxSub = sub;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (maxSub >= valueTh);
|
||||
}
|
||||
|
||||
private float getValueByAction(DynamicState dynamicState, int actionId) throws Exception {//通过动作获取价值
|
||||
Map<Integer, List<DynamicState>> sonStatesMap = dynamicState.getSonStatesMap();//动作-子状态集合
|
||||
List<DynamicState> sonStateListByAction = sonStatesMap.get(actionId);//当前状态最优策略动作下的子状态集合
|
||||
if (sonStateListByAction == null) {
|
||||
throw new Exception("该状态无下一步动作!可能该状态属于终结态,但并没有设置为终结态!");
|
||||
}
|
||||
float number = dynamicState.getNumber();//当前状态被执行的次数即所有子状态被执行的总数
|
||||
float updateValue = 0;//更新价值
|
||||
for (DynamicState sonState : sonStateListByAction) {
|
||||
Map<Integer, Integer> profitMap = sonState.getProfitMap();//子状态收益集合
|
||||
float sonNumber = sonState.getNumber();//该子状态执行的次数即该子状态所有收益被执行的总次数
|
||||
float sonPro = sonNumber / number;//当前子状态在当前最优策略动作下被执行的概率
|
||||
float value = sonState.getValue() * gaMa;//该子状态的价值
|
||||
//先对r求和
|
||||
float sigmaR = 0;
|
||||
for (Map.Entry<Integer, Integer> entryProfit : profitMap.entrySet()) {
|
||||
float profit = entryProfit.getKey();//--获取profit的收益
|
||||
float profitNumber = entryProfit.getValue();//--获取profit收益的次数
|
||||
float profitPro = (profitNumber / sonNumber) * sonPro;//在当前策略动作下产生的环境,产生profit收益的概率
|
||||
float v = (value + profit) * profitPro;//价值
|
||||
sigmaR = sigmaR + v;
|
||||
}
|
||||
updateValue = updateValue + sigmaR;
|
||||
}
|
||||
return updateValue;
|
||||
}
|
||||
|
||||
private float valueEvaluation(DynamicState dynamicState) throws Exception {//价值评估
|
||||
float myValue = dynamicState.getValue();//当前价值
|
||||
int bestActionId = dynamicState.getBestActionId();//当前最优策略选择的动作
|
||||
float updateValue = getValueByAction(dynamicState, bestActionId);
|
||||
dynamicState.setValue(updateValue);//更新价值
|
||||
return (float)Math.abs(myValue - updateValue);
|
||||
}
|
||||
//策略迭代
|
||||
//状态是当前环境的向量主键与价值函数
|
||||
//动作(包含动作的规则)是执行完成一个动作之后返回一个新的状态
|
||||
//策略决定执行什么动作,动作执行结束之后的收益是多少
|
||||
}
|
||||
Reference in New Issue
Block a user