Search in sources :

Example 1 with StateObservation

use of core.game.StateObservation in project SimpleAsteroids by ljialin.

the class SimpleMaxNTest method runOnce.

public static double runOnce() {
    // make an agent to test
    StateObservation noiseFree = new SimpleMaxGame();
    // new NoisyMaxGame();
    StateObservation stateObs = new SimpleMaxGame();
    System.out.println(stateObs.getGameScore());
    System.out.println(stateObs.copy().getGameScore());
    // System.exit(0);
    ElapsedCpuTimer timer = new ElapsedCpuTimer();
    AbstractPlayer player;
    controllers.singlePlayer.sampleOLMCTS.Agent olmcts = new controllers.singlePlayer.sampleOLMCTS.Agent(stateObs, timer);
    controllers.singlePlayer.discountOLMCTS.Agent discountOlmcts = new controllers.singlePlayer.discountOLMCTS.Agent(stateObs, timer);
    controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(stateObs, timer);
    player = olmcts;
    player = discountOlmcts;
    // for the following we can pass the Evolutionary algorithm to use
    int nResamples = 2;
    EvoAlg evoAlg = new SimpleRMHC(nResamples);
    int nEvals = 1000;
    double kExplore = 10;
    int nNeighbours = 100;
    evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
    // DefaultMutator.totalRandomChaosMutation = true;
    Agent.useShiftBuffer = false;
    controllers.singlePlayer.ea.Agent.SEQUENCE_LENGTH = 100;
    player = new controllers.singlePlayer.ea.Agent(stateObs, timer, evoAlg, nEvals);
    nestedMC.maxRolloutLength = 5;
    nestedMC.nestDepth = 5;
    player = nestedMC;
    // in milliseconds
    int thinkingTime = 50;
    int delay = 30;
    // player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
    // check that we can play the game
    Random random = new Random();
    // this is how many steps we'll take in the actual game ...
    int nSteps = 10;
    ElapsedTimer t = new ElapsedTimer();
    for (int i = 0; i < nSteps && !stateObs.isGameOver(); i++) {
        timer = new ElapsedCpuTimer();
        timer.setMaxTimeMillis(thinkingTime);
        Types.ACTIONS action = player.act(stateObs.copy(), timer);
        // System.out.println("Selected: " + action); //  + "\t " + action.ordinal());
        stateObs.advance(action);
        noiseFree.advance(action);
    // System.out.println(stateObs.getGameScore());
    }
    System.out.println(stateObs.getGameScore());
    System.out.println(noiseFree.getGameScore());
    System.out.println(stateObs.isGameOver());
    System.out.println(t);
    return noiseFree.getGameScore();
}
Also used : Agent(controllers.singlePlayer.ea.Agent) Types(ontology.Types) NTupleBanditEA(ntuple.NTupleBanditEA) Agent(controllers.singlePlayer.ea.Agent) EvoAlg(evodef.EvoAlg) StateObservation(core.game.StateObservation) SimpleRMHC(ga.SimpleRMHC) Random(java.util.Random) SimpleMaxGame(altgame.SimpleMaxGame) AbstractPlayer(core.player.AbstractPlayer) ElapsedTimer(utilities.ElapsedTimer) ElapsedCpuTimer(tools.ElapsedCpuTimer)

Example 2 with StateObservation

use of core.game.StateObservation in project SimpleAsteroids by ljialin.

the class SpaceBattleLinkTest method runTrial.

public static double runTrial(boolean runVisible) {
    // make an agent to test
    StateObservation stateObs = new SimpleMaxGame();
    // BattleGameSearchSpace.inject(BattleGameSearchSpace.getRandomPoint());
    // SampleEvolvedParams.solutions[1][2] = 5;
    // BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[1]);
    // BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[2]);
    BattleGameSearchSpace.inject(SampleEvolvedParams.solutions[1]);
    System.out.println("Params are:");
    System.out.println(BattleGameParameters.params);
    // can also overide parameters by setting them directly as follows:
    // BattleGameParameters.loss = 1.1;
    SpaceBattleLinkState linkState = new SpaceBattleLinkState();
    // set some parameters for the experiment
    GameActionSpaceAdapter.useHeuristic = false;
    Agent.useShiftBuffer = true;
    // DefaultMutator.totalRandomChaosMutation = false;
    // // supercl
    // StateObservation stateObs = linkState;
    ElapsedCpuTimer timer = new ElapsedCpuTimer();
    AbstractPlayer player;
    // controllers.singlePlayer.sampleOLMCTS.Agent olmcts =
    // new controllers.singlePlayer.sampleOLMCTS.Agent(linkState, timer);
    player = new controllers.singlePlayer.discountOLMCTS.Agent(linkState, timer);
    // try the evolutionary players
    int nResamples = 2;
    EvoAlg evoAlg = new SimpleRMHC(nResamples);
    double kExplore = 10;
    int nNeighbours = 100;
    int nEvals = 200;
    evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
    // player = new controllers.singlePlayer.ea.Agent(linkState, timer, evoAlg, nEvals);
    controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(linkState, timer);
    nestedMC.maxRolloutLength = 10;
    nestedMC.nestDepth = 2;
    player = nestedMC;
    // in milliseconds
    int thinkingTime = 50;
    int delay = 10;
    // player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
    // check that we can play the game
    Random random = new Random();
    int nSteps = 500;
    ElapsedTimer t = new ElapsedTimer();
    BattleView view = new BattleView(linkState.state);
    // set view to null to run fast with no visuals
    if (!runVisible)
        view = null;
    if (view != null) {
        new JEasyFrame(view, "Simple Battle Game");
    }
    boolean verbose = false;
    for (int i = 0; i < nSteps && !linkState.isGameOver(); i++) {
        ArrayList<Types.ACTIONS> actions = linkState.getAvailableActions();
        timer = new ElapsedCpuTimer();
        timer.setMaxTimeMillis(thinkingTime);
        Types.ACTIONS action = player.act(linkState.copy(), timer);
        // action = actions.get(random.nextInt(actions.size()));
        if (verbose)
            // + "\t " + action.ordinal());
            System.out.println(i + "\t Selected: " + action);
        linkState.advance(action);
        if (view != null) {
            view.repaint();
            try {
                Thread.sleep(delay);
            } catch (Exception e) {
            }
        }
        if (verbose)
            System.out.println(linkState.getGameScore());
    }
    System.out.println("Game score: " + linkState.getGameScore());
    return linkState.getGameScore();
}
Also used : Types(ontology.Types) NTupleBanditEA(ntuple.NTupleBanditEA) EvoAlg(evodef.EvoAlg) StateObservation(core.game.StateObservation) Random(java.util.Random) JEasyFrame(utilities.JEasyFrame) SimpleMaxGame(altgame.SimpleMaxGame) AbstractPlayer(core.player.AbstractPlayer) ElapsedTimer(utilities.ElapsedTimer) ElapsedCpuTimer(tools.ElapsedCpuTimer) Agent(controllers.singlePlayer.ea.Agent) BattleView(battle.BattleView) SimpleRMHC(ga.SimpleRMHC)

Example 3 with StateObservation

use of core.game.StateObservation in project SimpleAsteroids by ljialin.

the class Agent method nested.

void nested(StateObservation stateObservation, int nestingLevel, Types.ACTIONS[] moveSeq, int nActionsPlayed) {
    int nbMoves = 0;
    // Types.ACTIONS[] moves = new Types.ACTIONS[maxLegalMoves];
    lengthBestRollout[nestingLevel] = -1;
    scoreBestRollout[nestingLevel] = Double.NEGATIVE_INFINITY;
    float res;
    while (true) {
        if (stateObservation.isGameOver())
            return;
        if (nActionsPlayed >= maxRolloutLength)
            return;
        // return board.score ();
        for (int i = 0; i < num_actions; i++) {
            StateObservation state = stateObservation.copy();
            Types.ACTIONS[] moveSeqCopy = new Types.ACTIONS[maxRolloutLength];
            int nActionsCopy = nActionsPlayed;
            for (int j = 0; j < nActionsPlayed; j++) moveSeqCopy[j] = moveSeq[j];
            if (nestingLevel == 1) {
                state.advance(actions[i]);
                moveSeqCopy[nActionsCopy] = actions[i];
                nActionsCopy++;
                playout(state, moveSeqCopy, nActionsCopy);
            } else {
                state.advance(actions[i]);
                moveSeqCopy[nActionsCopy] = actions[i];
                nActionsCopy++;
                nested(state, nestingLevel - 1, moveSeqCopy, nActionsCopy);
            }
            double score = state.getGameScore();
            // System.out.println("Move Seq Copy: " + Arrays.toString(moveSeqCopy));
            if (score > scoreBestRollout[nestingLevel]) {
                // System.out.println ("level " + nestingLevel + "score " + score);
                scoreBestRollout[nestingLevel] = score;
                lengthBestRollout[nestingLevel] = maxRolloutLength;
                for (int j = 0; j < maxRolloutLength; j++) bestRollout[nestingLevel][j] = moveSeqCopy[j];
            } else {
            // this was to find a bug
            // System.out.println("score comparison: " + score + " : " + scoreBestRollout[nestingLevel] );
            }
        }
        // System.out.println(nestingLevel + " : " + nActionsPlayed);
        // for (Types.ACTIONS[] a : bestRollout) {
        // System.out.println(Arrays.toString(a));
        // }
        stateObservation.advance(bestRollout[nestingLevel][nActionsPlayed]);
        moveSeq[nActionsPlayed] = bestRollout[nestingLevel][nActionsPlayed];
        nActionsPlayed++;
    }
}
Also used : StateObservation(core.game.StateObservation)

Example 4 with StateObservation

use of core.game.StateObservation in project SimpleAsteroids by ljialin.

the class GameActionSpaceAdapter method evaluate.

@Override
public double evaluate(int[] actions) {
    // take a copy of the current game state and accumulate the score as we go along
    StateObservation obs = stateObservation.copy();
    // note the score now - for normalisation reasons
    // we wish to track the change in score, not the absolute score
    double initScore = obs.getGameScore();
    double discount = 1.0;
    double denom = 0;
    double discountedTot = 0;
    // need to do the visual stuff here ...
    LinePlot linePlot = null;
    if (visual) {
        float grey = (nEvals % 100) / 100;
        linePlot = new LinePlot().setColor(new Color(grey, grey, grey));
    }
    for (int i = 0; i < sequenceLength; i++) {
        obs.advance(gvgaiActions[actions[i]]);
        discountedTot += discount * (obs.getGameScore() - initScore);
        if (useHeuristic && obs instanceof SpaceBattleLinkState) {
            SpaceBattleLinkState state = (SpaceBattleLinkState) obs;
            discountedTot += state.getHeuristicScore();
        }
        denom += discount;
        discount *= discountFactor;
        if (linePlot != null) {
            linePlot.add(discountedTot + Math.random() * 0);
        }
    }
    if (visual) {
        linePlots.add(linePlot);
    }
    nEvals++;
    double delta;
    if (useDiscountFactor) {
        delta = discountedTot / denom;
    } else {
        delta = obs.getGameScore() - initScore;
    }
    delta += noiseLevel * random.nextGaussian();
    logger.log(delta, actions, false);
    return delta;
}
Also used : StateObservation(core.game.StateObservation) LinePlot(plot.LinePlot) SpaceBattleLinkState(gvglink.SpaceBattleLinkState)

Example 5 with StateObservation

use of core.game.StateObservation in project SimpleAsteroids by ljialin.

the class SimpleGridTest method runOnce.

public static double runOnce() {
    // make an agent to test
    StateObservation gridGame = new GridModel();
    System.out.println(gridGame.getGameScore());
    System.out.println(gridGame.copy().getGameScore());
    // System.exit(0);
    ElapsedCpuTimer timer = new ElapsedCpuTimer();
    AbstractPlayer player;
    controllers.singlePlayer.sampleOLMCTS.Agent olmcts = new controllers.singlePlayer.sampleOLMCTS.Agent(gridGame, timer);
    controllers.singlePlayer.discountOLMCTS.Agent discountOlmcts = new controllers.singlePlayer.discountOLMCTS.Agent(gridGame, timer);
    controllers.singlePlayer.nestedMC.Agent nestedMC = new controllers.singlePlayer.nestedMC.Agent(gridGame, timer);
    player = olmcts;
    // player = discountOlmcts;
    // for the following we can pass the Evolutionary algorithm to use
    int nResamples = 2;
    EvoAlg evoAlg = new SimpleRMHC(nResamples);
    int nEvals = 2000;
    double kExplore = 10;
    int nNeighbours = 100;
    evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
    evoAlg = new SlidingMeanEDA();
    // DefaultMutator.totalRandomChaosMutation = false;
    Agent.useShiftBuffer = true;
    Agent.SEQUENCE_LENGTH = 30;
    player = new Agent(gridGame, timer, evoAlg, nEvals);
    nestedMC.maxRolloutLength = 30;
    nestedMC.nestDepth = 3;
    // player = nestedMC;
    // in milliseconds
    int thinkingTime = 50;
    int delay = 30;
    // player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
    // check that we can play the game
    Random random = new Random();
    // this is how many steps we'll take in the actual game ...
    int nSteps = 30;
    ElapsedTimer t = new ElapsedTimer();
    for (int i = 0; i < nSteps && !gridGame.isGameOver(); i++) {
        timer = new ElapsedCpuTimer();
        timer.setMaxTimeMillis(thinkingTime);
        Types.ACTIONS action = player.act(gridGame.copy(), timer);
        System.out.println();
        // + "\t " + action.ordinal());
        System.out.println("Selected: " + action);
        gridGame.advance(action);
        System.out.println("Game state: " + gridGame);
        System.out.println();
    }
    System.out.println(gridGame.getGameScore());
    return gridGame.getGameScore();
}
Also used : Agent(controllers.singlePlayer.ea.Agent) Types(ontology.Types) GridModel(rl.grid.GridModel) SlidingMeanEDA(ntuple.SlidingMeanEDA) NTupleBanditEA(ntuple.NTupleBanditEA) EvoAlg(evodef.EvoAlg) StateObservation(core.game.StateObservation) SimpleRMHC(ga.SimpleRMHC) Random(java.util.Random) AbstractPlayer(core.player.AbstractPlayer) ElapsedTimer(utilities.ElapsedTimer) ElapsedCpuTimer(tools.ElapsedCpuTimer)

Aggregations

StateObservation (core.game.StateObservation)7 Types (ontology.Types)4 ElapsedCpuTimer (tools.ElapsedCpuTimer)4 Agent (controllers.singlePlayer.ea.Agent)3 AbstractPlayer (core.player.AbstractPlayer)3 EvoAlg (evodef.EvoAlg)3 SimpleRMHC (ga.SimpleRMHC)3 Random (java.util.Random)3 NTupleBanditEA (ntuple.NTupleBanditEA)3 ElapsedTimer (utilities.ElapsedTimer)3 SimpleMaxGame (altgame.SimpleMaxGame)2 BattleView (battle.BattleView)1 SpaceBattleLinkState (gvglink.SpaceBattleLinkState)1 SlidingMeanEDA (ntuple.SlidingMeanEDA)1 LinePlot (plot.LinePlot)1 GridModel (rl.grid.GridModel)1 JEasyFrame (utilities.JEasyFrame)1