Search in sources :

Example 6 with StateObservationMulti

use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.

the class OnePlyOpt method act.

public Types.ACTIONS act(StateObservationMulti obs, ElapsedCpuTimer elapsedCpuTimer) {
    ArrayList<Types.ACTIONS> actions = obs.getAvailableActions();
    for (int i = 0; i < actions.size(); i++) {
        for (int j = 0; j < actions.size(); j++) {
            Types.ACTIONS[] acts = new Types.ACTIONS[2];
            acts[myId] = actions.get(i);
            acts[oppId] = actions.get(j);
            StateObservationMulti tmp = obs.copy();
            tmp.advance(acts);
            double score = tmp.getGameScore(myId);
            System.out.format("%d\t %d\t %.1f\n", i, j, score);
        }
    }
    return obs.getAvailableActions().get(action);
}
Also used : StateObservationMulti(core.game.StateObservationMulti)

Example 7 with StateObservationMulti

use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.

the class GameActionSpaceAdapterMulti method evaluate.

@Override
public double evaluate(int[] actions) {
    // take a copy of the current game state and accumulate the score as we go along
    // System.out.println("Checking action length: " + actions.length + " : " + sequenceLength);
    // System.out.println("PLayer id: " + playerID);
    StateObservationMulti obs = stateObservation.copy();
    // note the score now - for normalisation reasons
    // we wish to track the change in score, not the absolute score
    double initScore = obs.getGameScore(playerID);
    double discount = 1.0;
    double denom = 0;
    double discountedTot = 0;
    double total = 0;
    // need to do the visual stuff here ...
    LinePlot linePlot = null;
    if (visual) {
        if (lineChart == null) {
            lineChart = new LineChart().setBG(Color.gray);
            lineChart.xAxis = new LineChartAxis(new double[] { 0, sequenceLength / 2, sequenceLength });
            lineChart.yAxis = new LineChartAxis(new double[] { -50, -25, 0, 25, 50 });
            lineChart.plotBG = Color.white;
            lineChart.setYLabel("Score");
            lineChart.setXLabel("Rollout depth");
            frame = new JEasyFrame(lineChart, "Score versus depth");
        }
        float grey = (nEvals % 100) / 150.0f;
        // add in a zero for the first element of the plot, since there
        // will be zero difference before any action has been taken
        linePlot = new LinePlot().setColor(new Color(grey, grey, grey));
    // linePlot = new LinePlot().setColor(Color.red);
    }
    // deltas.add(0);
    for (int i = 0; i < actions.length; i++) {
        // Note here that we need to look at the advance method which takes multiple players
        // hence an array of actions
        // the idea is that we'll pad out the
        int myAction = actions[i];
        int opAction = random.nextInt(obs.getAvailableActions(opponentID).size());
        // opAction = AsteroidsGameState.doNothing;
        Types.ACTIONS[] acts = new Types.ACTIONS[2];
        acts[playerID] = gvgaiActions[myAction];
        acts[opponentID] = gvgaiActions[opAction];
        for (int k = 0; k < actionRepeat; k++) {
            obs.advance(acts);
        }
        discountedTot += discount * (obs.getGameScore(playerID) - initScore);
        if (useHeuristic && obs instanceof SpaceBattleLinkStateTwoPlayer) {
            SpaceBattleLinkStateTwoPlayer state = (SpaceBattleLinkStateTwoPlayer) obs;
            discountedTot += state.getHeuristicScore();
        }
        denom += discount;
        discount *= discountFactor;
        if (linePlot != null) {
            // linePlot.add(discountedTot);
            double delta = obs.getGameScore((playerID)) - initScore;
            linePlot.add(delta);
            deltas.add(delta);
        }
    }
    if (visual) {
        linePlots.add(linePlot);
    }
    nEvals++;
    double delta;
    if (useDiscountFactor) {
        delta = discountedTot / denom;
    } else {
        delta = obs.getGameScore(playerID) - initScore;
    }
    delta += noiseLevel * random.nextGaussian();
    logger.log(delta, actions, false);
    return delta;
}
Also used : LineChartAxis(plot.LineChartAxis) LinePlot(plot.LinePlot) JEasyFrame(utilities.JEasyFrame) StateObservationMulti(core.game.StateObservationMulti) LineChart(plot.LineChart) SpaceBattleLinkStateTwoPlayer(gvglink.SpaceBattleLinkStateTwoPlayer)

Example 8 with StateObservationMulti

use of core.game.StateObservationMulti in project SimpleAsteroids by ljialin.

the class AgentEvaluator method evaluate.

@Override
public double evaluate(int[] solution) {
    // at thias point,
    System.out.println("Params are:");
    System.out.println(searchSpace.report(solution));
    // can also override parameters by setting them directly as follows:
    BattleGameParameters.loss = 0.996;
    BattleGameParameters.thrust = 3;
    // BattleGameParameters.shipSize *= 2;
    // BattleGameParameters.damageRadius *= 2;
    SpaceBattleLinkStateTwoPlayer linkState = new SpaceBattleLinkStateTwoPlayer();
    StateObservationMulti multi = linkState;
    GameActionSpaceAdapterMulti.useHeuristic = false;
    // DefaultMutator.totalRandomChaosMutation = false;
    ElapsedCpuTimer timer = new ElapsedCpuTimer();
    // AbstractMultiPlayer player2;
    int idPlayer1 = 0;
    int idPlayer2 = 1;
    // player2 = new controllers.multiPlayer.discountOLMCTS.Agent(linkState, timer, idPlayer2);
    // try the evolutionary players
    int nResamples = 2;
    EvoAlg evoAlg = new SimpleRMHC(nResamples);
    double kExplore = searchSpace.getExplorationFactor(solution);
    int nNeighbours = 100;
    int nEvals = 100;
    evoAlg = new NTupleBanditEA(kExplore, nNeighbours);
    evoAlg = new SlidingMeanEDA().setHistoryLength(searchSpace.getHistoryLength(solution));
    Agent evoAgent = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg, idPlayer1, nEvals);
    evoAgent.setDiscountFactor(searchSpace.getDiscountFactor(solution));
    evoAgent.sequenceLength = searchSpace.getRolloutLength(solution);
    // evoAgent.di
    // EvoAlg evoAlg2 = new CompactSlidingModelGA().setHistoryLength(2);
    EvoAlg evoAlg2 = new SlidingMeanEDA().setHistoryLength(2);
    Agent player2 = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg2, idPlayer2, nEvals);
    player2.sequenceLength = 5;
    // player2 = new controllers.multiPlayer.ea.Agent(linkState, timer, new SimpleRMHC(nResamples), idPlayer2, nEvals);
    // player1  = new controllers.multiPlayer.smlrand.Agent();
    // EvoAlg evoAlg2 = new SimpleRMHC(2);
    // player1 = new controllers.multiPlayer.ea.Agent(linkState, timer, evoAlg2, idPlayer1, nEvals);
    // in milliseconds
    int thinkingTime = 10;
    int delay = 10;
    // player = new controllers.singlePlayer.sampleRandom.Agent(stateObs, timer);
    // check that we can play the game
    Random random = new Random();
    int nSteps = 500;
    ElapsedTimer t = new ElapsedTimer();
    StatSummary sst1 = new StatSummary("Player 1 Elapsed Time");
    StatSummary sst2 = new StatSummary("Player 2 Elapsed Time");
    StatSummary ssTicks1 = new StatSummary("Player 1 nTicks");
    StatSummary ssTicks2 = new StatSummary("Player 2 nTicks");
    for (int i = 0; i < nSteps && !linkState.isGameOver(); i++) {
        linkState.state = linkState.state.copyState();
        timer = new ElapsedCpuTimer();
        timer.setMaxTimeMillis(thinkingTime);
        ElapsedTimer t1 = new ElapsedTimer();
        // keep track of the number of game ticks used by each algorithm
        int ticks;
        ticks = SpaceBattleLinkStateTwoPlayer.nTicks;
        Types.ACTIONS action1 = evoAgent.act(multi.copy(), timer);
        sst1.add(t1.elapsed());
        ticks = SpaceBattleLinkStateTwoPlayer.nTicks - ticks;
        ssTicks1.add(ticks);
        // System.out.println("Player 1 Ticks = " + ticks);
        ElapsedTimer t2 = new ElapsedTimer();
        ticks = SpaceBattleLinkStateTwoPlayer.nTicks;
        Types.ACTIONS action2 = player2.act(multi.copy(), timer);
        sst2.add(t2.elapsed());
        ticks = SpaceBattleLinkStateTwoPlayer.nTicks - ticks;
        ssTicks2.add(ticks);
        // System.out.println("Player 2 Ticks = " + ticks);
        multi.advance(new Types.ACTIONS[] { action1, action2 });
    }
    System.out.println(multi.getGameScore());
    System.out.println(multi.isGameOver());
    // System.out.println(SingleTreeNode.rollOutScores);
    System.out.println(sst1);
    System.out.println(sst2);
    System.out.println(ssTicks1);
    System.out.println(ssTicks2);
    double score = multi.getGameScore(0);
    System.out.println("Game score: " + score);
    if (score > 0)
        return 1;
    if (score < 0)
        return -1;
    return 0;
}
Also used : Agent(controllers.multiPlayer.ea.Agent) Types(ontology.Types) SlidingMeanEDA(ntuple.SlidingMeanEDA) NTupleBanditEA(ntuple.NTupleBanditEA) StatSummary(utilities.StatSummary) SimpleRMHC(ga.SimpleRMHC) Random(java.util.Random) StateObservationMulti(core.game.StateObservationMulti) ElapsedTimer(utilities.ElapsedTimer) ElapsedCpuTimer(tools.ElapsedCpuTimer) SpaceBattleLinkStateTwoPlayer(gvglink.SpaceBattleLinkStateTwoPlayer)

Aggregations

StateObservationMulti (core.game.StateObservationMulti)8 ElapsedCpuTimer (tools.ElapsedCpuTimer)5 Random (java.util.Random)4 Types (ontology.Types)4 ElapsedTimer (utilities.ElapsedTimer)4 SimpleRMHC (ga.SimpleRMHC)3 SpaceBattleLinkStateTwoPlayer (gvglink.SpaceBattleLinkStateTwoPlayer)3 StatSummary (utilities.StatSummary)3 AbstractMultiPlayer (core.player.AbstractMultiPlayer)2 EvoAlg (evodef.EvoAlg)2 NTupleBanditEA (ntuple.NTupleBanditEA)2 SlidingMeanEDA (ntuple.SlidingMeanEDA)2 JEasyFrame (utilities.JEasyFrame)2 BattleView (battle.BattleView)1 Agent (controllers.multiPlayer.ea.Agent)1 CompactSlidingModelGA (ntuple.CompactSlidingModelGA)1 DiffGame (numbergame.DiffGame)1 LineChart (plot.LineChart)1 LineChartAxis (plot.LineChartAxis)1 LinePlot (plot.LinePlot)1