use of nars.rl.horde.demons.PredictionDemonVerifier in project narchy by automenta.
the class HordeTest method testPredictionDemon.
@Test
public void testPredictionDemon() {
TD td = new TD(0.0, 0.1, 1);
PredictionDemon predictionDemon = new PredictionDemon(RewardFunction01, td);
PredictionDemonVerifier verifier = new PredictionDemonVerifier(td.gamma(), predictionDemon);
runExperiment(predictionDemon, verifier);
assertEquals(RewardFunction01.reward(), predictionDemon.prediction(), 1.0);
}
use of nars.rl.horde.demons.PredictionDemonVerifier in project narchy by automenta.
the class HordeTest method testPredictionDemonGamma09MultipleState.
@Test
public void testPredictionDemonGamma09MultipleState() {
final int bufferSize = 50;
double gamma = 0.9;
TD td = new TD(gamma, 0.1, bufferSize);
CustomRewardFunction rewardFunction = new CustomRewardFunction(bufferSize);
PredictionDemon predictionDemon = new PredictionDemon(rewardFunction, td);
PredictionDemonVerifier verifier = new PredictionDemonVerifier(td.gamma(), predictionDemon);
TimeToState timeToState = new TimeToState() {
@Override
public RealVector get(int time) {
RealVector r = new ArrayRealVector(bufferSize);
r.setEntry(time % bufferSize, 1);
return r;
}
};
runExperiment(predictionDemon, verifier, timeToState, 1000 * bufferSize);
}
use of nars.rl.horde.demons.PredictionDemonVerifier in project narchy by automenta.
the class HordeTest method testPredictionDemonGamma09.
@Test
public void testPredictionDemonGamma09() {
double gamma = 0.9;
TD td = new TD(gamma, 0.1, 1);
PredictionDemon predictionDemon = new PredictionDemon(RewardFunction01, td);
PredictionDemonVerifier verifier = new PredictionDemonVerifier(td.gamma(), predictionDemon);
runExperiment(predictionDemon, verifier);
assertEquals(RewardFunction01.reward() / (1 - gamma), predictionDemon.prediction(), 1.0);
}
use of nars.rl.horde.demons.PredictionDemonVerifier in project narchy by automenta.
the class HordeTest method runExperiment.
protected void runExperiment(PredictionDemon predictionDemon, PredictionDemonVerifier demonVerifier, TimeToState timeToState, int maxStep) {
RealVector x_t = null;
int time = 0;
PredictionDemonVerifier.TDErrorMonitor verifier = demonVerifier.errorMonitor();
while (!verifier.errorComputed() || Math.abs(verifier.error()) >= verifier.precision()) {
RealVector x_tp1 = timeToState.get(time);
((RewardFunctionTest) predictionDemon.rewardFunction()).update(time);
predictionDemon.update(x_t, null, x_tp1);
demonVerifier.update(false);
x_t = x_tp1;
time++;
assertTrue(time < maxStep);
}
}
Aggregations