use of core.game.StateObservation in project SimpleAsteroids by ljialin.
the class SimpleMaxNTest method runOnce.
public static double runOnce() {
// make an agent to test
StateObservation noiseFree = new SimpleMaxGame();
// new NoisyMaxGame();
StateObservation stateObs = new SimpleMaxGame();
System.out.println(stateObs.getGameScore());
System.out.println(stateObs.copy().getGameScore());
// System.exit(0);
ElapsedCpuTimer timer = new ElapsedCpuTimer();
AbstractPlayer player;
controllers.singlePlayer.sampleOLMCTS.Agent olmcts = new controllers.singlePlayer.sampleOLMCTS.Agent(stateObs, timer);
controllers.singlePlayer.discountOLMCTS.Agent discountOlmcts = new controllers.singlePlayer.discountOLMCTS.Agent(stateObs, timer);
controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(stateObs, timer);
player = olmcts;
player = discountOlmcts;
// for the following we can pass the Evolutionary algorithm to use
int nResamples = 2;
EvoAlg evoAlg = new SimpleRMHC(nResamples);
int nEvals = 1000;
double kExplore = 10;
int nNeighbours = 100;
evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
// DefaultMutator.totalRandomChaosMutation = true;
Agent.useShiftBuffer = false;
controllers.singlePlayer.ea.Agent.SEQUENCE_LENGTH = 100;
player = new controllers.singlePlayer.ea.Agent(stateObs, timer, evoAlg, nEvals);
nestedMC.maxRolloutLength = 5;
nestedMC.nestDepth = 5;
player = nestedMC;
// in milliseconds
int thinkingTime = 50;
int delay = 30;
// player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
// check that we can play the game
Random random = new Random();
// this is how many steps we'll take in the actual game ...
int nSteps = 10;
ElapsedTimer t = new ElapsedTimer();
for (int i = 0; i < nSteps && !stateObs.isGameOver(); i++) {
timer = new ElapsedCpuTimer();
timer.setMaxTimeMillis(thinkingTime);
Types.ACTIONS action = player.act(stateObs.copy(), timer);
// System.out.println("Selected: " + action); // + "\t " + action.ordinal());
stateObs.advance(action);
noiseFree.advance(action);
// System.out.println(stateObs.getGameScore());
}
System.out.println(stateObs.getGameScore());
System.out.println(noiseFree.getGameScore());
System.out.println(stateObs.isGameOver());
System.out.println(t);
return noiseFree.getGameScore();
}
use of core.game.StateObservation in project SimpleAsteroids by ljialin.
the class SpaceBattleLinkTest method runTrial.
public static double runTrial(boolean runVisible) {
// make an agent to test
StateObservation stateObs = new SimpleMaxGame();
// BattleGameSearchSpace.inject(BattleGameSearchSpace.getRandomPoint());
// SampleEvolvedParams.solutions[1][2] = 5;
// BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[1]);
// BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[2]);
BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[1]);
System.out.println("Params are:");
System.out.println(BattleGameParameters.params);
// can also overide parameters by setting them directly as follows:
// BattleGameParameters.loss = 1.1;
SpaceBattleLinkState linkState = new SpaceBattleLinkState();
// set some parameters for the experiment
GameActionSpaceAdapter.useHeuristic = false;
Agent.useShiftBuffer = true;
// DefaultMutator.totalRandomChaosMutation = false;
// // supercl
// StateObservation stateObs = linkState;
ElapsedCpuTimer timer = new ElapsedCpuTimer();
AbstractPlayer player;
// controllers.singlePlayer.sampleOLMCTS.Agent olmcts =
// new controllers.singlePlayer.sampleOLMCTS.Agent(linkState, timer);
player = new controllers.singlePlayer.discountOLMCTS.Agent(linkState, timer);
// try the evolutionary players
int nResamples = 2;
EvoAlg evoAlg = new SimpleRMHC(nResamples);
double kExplore = 10;
int nNeighbours = 100;
int nEvals = 200;
evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
// player = new controllers.singlePlayer.ea.Agent(linkState, timer, evoAlg, nEvals);
controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(linkState, timer);
nestedMC.maxRolloutLength = 10;
nestedMC.nestDepth = 2;
player = nestedMC;
// in milliseconds
int thinkingTime = 50;
int delay = 10;
// player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
// check that we can play the game
Random random = new Random();
int nSteps = 500;
ElapsedTimer t = new ElapsedTimer();
BattleView view = new BattleView(linkState.state);
// set view to null to run fast with no visuals
if (!runVisible)
view = null;
if (view != null) {
new JEasyFrame(view, "Simple Battle Game");
}
boolean verbose = false;
for (int i = 0; i < nSteps && !linkState.isGameOver(); i++) {
ArrayList<Types.ACTIONS> actions = linkState.getAvailableActions();
timer = new ElapsedCpuTimer();
timer.setMaxTimeMillis(thinkingTime);
Types.ACTIONS action = player.act(linkState.copy(), timer);
// action = actions.get(random.nextInt(actions.size()));
if (verbose)
// + "\t " + action.ordinal());
System.out.println(i + "\t Selected: " + action);
linkState.advance(action);
if (view != null) {
view.repaint();
try {
Thread.sleep(delay);
} catch (Exception e) {
}
}
if (verbose)
System.out.println(linkState.getGameScore());
}
System.out.println("Game score: " + linkState.getGameScore());
return linkState.getGameScore();
}
use of core.game.StateObservation in project SimpleAsteroids by ljialin.
the class Agent method nested.
void nested(StateObservation stateObservation, int nestingLevel, Types.ACTIONS[] moveSeq, int nActionsPlayed) {
int nbMoves = 0;
// Types.ACTIONS[] moves = new Types.ACTIONS[maxLegalMoves];
lengthBestRollout[nestingLevel] = -1;
scoreBestRollout[nestingLevel] = Double.NEGATIVE_INFINITY;
float res;
while (true) {
if (stateObservation.isGameOver())
return;
if (nActionsPlayed >= maxRolloutLength)
return;
// return board.score ();
for (int i = 0; i < num_actions; i++) {
StateObservation state = stateObservation.copy();
Types.ACTIONS[] moveSeqCopy = new Types.ACTIONS[maxRolloutLength];
int nActionsCopy = nActionsPlayed;
for (int j = 0; j < nActionsPlayed; j++) moveSeqCopy[j] = moveSeq[j];
if (nestingLevel == 1) {
state.advance(actions[i]);
moveSeqCopy[nActionsCopy] = actions[i];
nActionsCopy++;
playout(state, moveSeqCopy, nActionsCopy);
} else {
state.advance(actions[i]);
moveSeqCopy[nActionsCopy] = actions[i];
nActionsCopy++;
nested(state, nestingLevel - 1, moveSeqCopy, nActionsCopy);
}
double score = state.getGameScore();
// System.out.println("Move Seq Copy: " + Arrays.toString(moveSeqCopy));
if (score > scoreBestRollout[nestingLevel]) {
// System.out.println ("level " + nestingLevel + "score " + score);
scoreBestRollout[nestingLevel] = score;
lengthBestRollout[nestingLevel] = maxRolloutLength;
for (int j = 0; j < maxRolloutLength; j++) bestRollout[nestingLevel][j] = moveSeqCopy[j];
} else {
// this was to find a bug
// System.out.println("score comparison: " + score + " : " + scoreBestRollout[nestingLevel] );
}
}
// System.out.println(nestingLevel + " : " + nActionsPlayed);
// for (Types.ACTIONS[] a : bestRollout) {
// System.out.println(Arrays.toString(a));
// }
stateObservation.advance(bestRollout[nestingLevel][nActionsPlayed]);
moveSeq[nActionsPlayed] = bestRollout[nestingLevel][nActionsPlayed];
nActionsPlayed++;
}
}
use of core.game.StateObservation in project SimpleAsteroids by ljialin.
the class GameActionSpaceAdapter method evaluate.
@Override
public double evaluate(int[] actions) {
// take a copy of the current game state and accumulate the score as we go along
StateObservation obs = stateObservation.copy();
// note the score now - for normalisation reasons
// we wish to track the change in score, not the absolute score
double initScore = obs.getGameScore();
double discount = 1.0;
double denom = 0;
double discountedTot = 0;
// need to do the visual stuff here ...
LinePlot linePlot = null;
if (visual) {
float grey = (nEvals % 100) / 100;
linePlot = new LinePlot().setColor(new Color(grey, grey, grey));
}
for (int i = 0; i < sequenceLength; i++) {
obs.advance(gvgaiActions[actions[i]]);
discountedTot += discount * (obs.getGameScore() - initScore);
if (useHeuristic && obs instanceof SpaceBattleLinkState) {
SpaceBattleLinkState state = (SpaceBattleLinkState) obs;
discountedTot += state.getHeuristicScore();
}
denom += discount;
discount *= discountFactor;
if (linePlot != null) {
linePlot.add(discountedTot + Math.random() * 0);
}
}
if (visual) {
linePlots.add(linePlot);
}
nEvals++;
double delta;
if (useDiscountFactor) {
delta = discountedTot / denom;
} else {
delta = obs.getGameScore() - initScore;
}
delta += noiseLevel * random.nextGaussian();
logger.log(delta, actions, false);
return delta;
}
use of core.game.StateObservation in project SimpleAsteroids by ljialin.
the class SimpleGridTest method runOnce.
public static double runOnce() {
// make an agent to test
StateObservation gridGame = new GridModel();
System.out.println(gridGame.getGameScore());
System.out.println(gridGame.copy().getGameScore());
// System.exit(0);
ElapsedCpuTimer timer = new ElapsedCpuTimer();
AbstractPlayer player;
controllers.singlePlayer.sampleOLMCTS.Agent olmcts = new controllers.singlePlayer.sampleOLMCTS.Agent(gridGame, timer);
controllers.singlePlayer.discountOLMCTS.Agent discountOlmcts = new controllers.singlePlayer.discountOLMCTS.Agent(gridGame, timer);
controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(gridGame, timer);
player = olmcts;
// player = discountOlmcts;
// for the following we can pass the Evolutionary algorithm to use
int nResamples = 2;
EvoAlg evoAlg = new SimpleRMHC(nResamples);
int nEvals = 2000;
double kExplore = 10;
int nNeighbours = 100;
evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
evoAlg = new SlidingMeanEDA();
// DefaultMutator.totalRandomChaosMutation = false;
Agent.useShiftBuffer = true;
Agent.SEQUENCE_LENGTH = 30;
player = new Agent(gridGame, timer, evoAlg, nEvals);
nestedMC.maxRolloutLength = 30;
nestedMC.nestDepth = 3;
// player = nestedMC;
// in milliseconds
int thinkingTime = 50;
int delay = 30;
// player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
// check that we can play the game
Random random = new Random();
// this is how many steps we'll take in the actual game ...
int nSteps = 30;
ElapsedTimer t = new ElapsedTimer();
for (int i = 0; i < nSteps && !gridGame.isGameOver(); i++) {
timer = new ElapsedCpuTimer();
timer.setMaxTimeMillis(thinkingTime);
Types.ACTIONS action = player.act(gridGame.copy(), timer);
System.out.println();
// + "\t " + action.ordinal());
System.out.println("Selected: " + action);
gridGame.advance(action);
System.out.println("Game state: " + gridGame);
System.out.println();
}
System.out.println(gridGame.getGameScore());
return gridGame.getGameScore();
}
Aggregations