use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.
the class OnePlyOpt method act.
public Types.ACTIONS act(StateObservationMulti obs, ElapsedCpuTimer elapsedCpuTimer) {
ArrayList<Types.ACTIONS> actions = obs.getAvailableActions();
for (int i = 0; i < actions.size(); i++) {
for (int j = 0; j < actions.size(); j++) {
Types.ACTIONS[] acts = new Types.ACTIONS[2];
acts[myId] = actions.get(i);
acts[oppId] = actions.get(j);
StateObservationMulti tmp = obs.copy();
tmp.advance(acts);
double score = tmp.getGameScore(myId);
System.out.format("%d\t %d\t %.1f\n", i, j, score);
}
}
return obs.getAvailableActions().get(action);
}
use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.
the class GameActionSpaceAdapterMulti method evaluate.
@Override
public double evaluate(int[] actions) {
// take a copy of the current game state and accumulate the score as we go along
// System.out.println("Checking action length: " + actions.length + " : " + sequenceLength);
// System.out.println("PLayer id: " + playerID);
StateObservationMulti obs = stateObservation.copy();
// note the score now - for normalisation reasons
// we wish to track the change in score, not the absolute score
double initScore = obs.getGameScore(playerID);
double discount = 1.0;
double denom = 0;
double discountedTot = 0;
double total = 0;
// need to do the visual stuff here ...
LinePlot linePlot = null;
if (visual) {
if (lineChart == null) {
lineChart = new LineChart().setBG(Color.gray);
lineChart.xAxis = new LineChartAxis(new double[] { 0, sequenceLength / 2, sequenceLength });
lineChart.yAxis = new LineChartAxis(new double[] { -50, -25, 0, 25, 50 });
lineChart.plotBG = Color.white;
lineChart.setYLabel("Score");
lineChart.setXLabel("Rollout depth");
frame = new JEasyFrame(lineChart, "Score versus depth");
}
float grey = (nEvals % 100) / 150.0f;
// add in a zero for the first element of the plot, since there
// will be zero difference before any action has been taken
linePlot = new LinePlot().setColor(new Color(grey, grey, grey));
// linePlot = new LinePlot().setColor(Color.red);
}
// deltas.add(0);
for (int i = 0; i < actions.length; i++) {
// Note here that we need to look at the advance method which takes multiple players
// hence an array of actions
// the idea is that we'll pad out the
int myAction = actions[i];
int opAction = random.nextInt(obs.getAvailableActions(opponentID).size());
// opAction = AsteroidsGameState.doNothing;
Types.ACTIONS[] acts = new Types.ACTIONS[2];
acts[playerID] = gvgaiActions[myAction];
acts[opponentID] = gvgaiActions[opAction];
for (int k = 0; k < actionRepeat; k++) {
obs.advance(acts);
}
discountedTot += discount * (obs.getGameScore(playerID) - initScore);
if (useHeuristic && obs instanceof SpaceBattleLinkStateTwoPlayer) {
SpaceBattleLinkStateTwoPlayer state = (SpaceBattleLinkStateTwoPlayer) obs;
discountedTot += state.getHeuristicScore();
}
denom += discount;
discount *= discountFactor;
if (linePlot != null) {
// linePlot.add(discountedTot);
double delta = obs.getGameScore((playerID)) - initScore;
linePlot.add(delta);
deltas.add(delta);
}
}
if (visual) {
linePlots.add(linePlot);
}
nEvals++;
double delta;
if (useDiscountFactor) {
delta = discountedTot / denom;
} else {
delta = obs.getGameScore(playerID) - initScore;
}
delta += noiseLevel * random.nextGaussian();
logger.log(delta, actions, false);
return delta;
}
use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.
the class AgentEvaluator method evaluate.
@Override
public double evaluate(int[] solution) {
// at thias point,
System.out.println("Params are:");
System.out.println(searchSpace.report(solution));
// can also override parameters by setting them directly as follows:
BattleGameParameters.loss = 0.996;
BattleGameParameters.thrust = 3;
// BattleGameParameters.shipSize *= 2;
// BattleGameParameters.damageRadius *= 2;
SpaceBattleLinkStateTwoPlayer linkState = new SpaceBattleLinkStateTwoPlayer();
StateObservationMulti multi = linkState;
GameActionSpaceAdapterMulti.useHeuristic = false;
// DefaultMutator.totalRandomChaosMutation = false;
ElapsedCpuTimer timer = new ElapsedCpuTimer();
// AbstractMultiPlayer player2;
int idPlayer1 = 0;
int idPlayer2 = 1;
// player2 = new controllers.multiPlayer.discountOLMCTS.Agent(linkState, timer, idPlayer2);
// try the evolutionary players
int nResamples = 2;
EvoAlg evoAlg = new SimpleRMHC(nResamples);
double kExplore = searchSpace.getExplorationFactor(solution);
int nNeighbours = 100;
int nEvals = 100;
evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
evoAlg = new SlidingMeanEDA().setHistoryLength(searchSpace.getHistoryLength(solution));
Agent evoAgent = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg, idPlayer1, nEvals);
evoAgent.setDiscountFactor(searchSpace.getDiscountFactor(solution));
evoAgent.sequenceLength = searchSpace.getRolloutLength(solution);
// evoAgent.di
// EvoAlg evoAlg2 = new CompactSlidingModelGA().setHistoryLength(2);
EvoAlg evoAlg2 = new SlidingMeanEDA().setHistoryLength(2);
Agent player2 = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg2, idPlayer2, nEvals);
player2.sequenceLength = 5;
// player2 = new controllers.multiPlayer.ea.Agent(linkState, timer, new SimpleRMHC(nResamples), idPlayer2, nEvals);
// player1 = new controllers.multiPlayer.smlrand.Agent();
// EvoAlg evoAlg2 = new SimpleRMHC(2);
// player1 = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg2, idPlayer1, nEvals);
// in milliseconds
int thinkingTime = 10;
int delay = 10;
// player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
// check that we can play the game
Random random = new Random();
int nSteps = 500;
ElapsedTimer t = new ElapsedTimer();
StatSummary sst1 = new StatSummary("Player 1 Elapsed Time");
StatSummary sst2 = new StatSummary("Player 2 Elapsed Time");
StatSummary ssTicks1 = new StatSummary("Player 1 nTicks");
StatSummary ssTicks2 = new StatSummary("Player 2 nTicks");
for (int i = 0; i < nSteps && !linkState.isGameOver(); i++) {
linkState.state = linkState.state.copyState();
timer = new ElapsedCpuTimer();
timer.setMaxTimeMillis(thinkingTime);
ElapsedTimer t1 = new ElapsedTimer();
// keep track of the number of game ticks used by each algorithm
int ticks;
ticks = SpaceBattleLinkStateTwoPlayer.nTicks;
Types.ACTIONS action1 = evoAgent.act(multi.copy(), timer);
sst1.add(t1.elapsed());
ticks = SpaceBattleLinkStateTwoPlayer.nTicks - ticks;
ssTicks1.add(ticks);
// System.out.println("Player 1 Ticks = " + ticks);
ElapsedTimer t2 = new ElapsedTimer();
ticks = SpaceBattleLinkStateTwoPlayer.nTicks;
Types.ACTIONS action2 = player2.act(multi.copy(), timer);
sst2.add(t2.elapsed());
ticks = SpaceBattleLinkStateTwoPlayer.nTicks - ticks;
ssTicks2.add(ticks);
// System.out.println("Player 2 Ticks = " + ticks);
multi.advance(new Types.ACTIONS[] { action1, action2 });
}
System.out.println(multi.getGameScore());
System.out.println(multi.isGameOver());
// System.out.println(SingleTreeNode.rollOutScores);
System.out.println(sst1);
System.out.println(sst2);
System.out.println(ssTicks1);
System.out.println(ssTicks2);
double score = multi.getGameScore(0);
System.out.println("Game score: " + score);
if (score > 0)
return 1;
if (score < 0)
return -1;
return 0;
}
Aggregations