use of nars.rl.horde.math.VectorPool in project narchy by automenta.
the class GQ method update.
public double update(RealVector x_t, double rho_t, double r_tp1, RealVector x_bar_tp1, double z_tp1) {
if (x_t == null)
return initEpisode();
VectorPool pool = VectorPools.pool(x_t);
delta_t = r_tp1 + beta_tp1 * z_tp1 + (1 - beta_tp1) * v.dotProduct(x_bar_tp1) - v.dotProduct(x_t);
e.update((1 - beta_tp1) * lambda_t * rho_t, x_t);
RealVector delta_e = pool.newVector(e.vect()).mapMultiplyToSelf(delta_t);
ArrayRealVector tdCorrection = pool.newVector();
if (x_bar_tp1 != null)
tdCorrection.combineToSelf(0, 1, x_bar_tp1).mapMultiplyToSelf((1 - beta_tp1) * (1 - lambda_t) * e.vect().dotProduct(w));
v.combineToSelf(1, alpha_v, pool.newVector(delta_e).combineToSelf(1, -1, tdCorrection));
w.combineToSelf(1, alpha_w, delta_e.combineToSelf(1, -1, pool.newVector(x_t).mapMultiplyToSelf(w.dotProduct(x_t))));
delta_e = null;
pool.releaseAll();
return delta_t;
}
use of nars.rl.horde.math.VectorPool in project narchy by automenta.
the class GreedyGQ method update.
public double update(RealVector x_t, A a_t, double r_tp1, double gamma_tp1, double z_tp1, RealVector x_tp1, A a_tp1) {
rho_t = 0.0;
if (a_t != null && x_t != null) /*!Vectors.isNull(x_t)*/
{
target.update(x_t);
behaviour.update(x_t);
rho_t = target.pi(a_t) / behaviour.pi(a_t);
}
// assert Utils.checkValue(rho_t);
VectorPool pool = VectorPools.pool(prototype, gq.v.getDimension());
RealVector sa_bar_tp1 = pool.newVector();
// if (!Vectors.isNull(x_t) && !Vectors.isNull(x_tp1)) {
if (x_t != null && x_tp1 != null) {
target.update(x_tp1);
for (A a : actions) {
double pi = target.pi(a);
if (pi == 0)
continue;
sa_bar_tp1.combineToSelf(1, pi, toStateAction.stateAction(x_tp1, a));
}
}
RealVector phi_stat = x_t != null ? toStateAction.stateAction(x_t, a_t) : null;
double delta_t = gq.update(phi_stat, rho_t, r_tp1, sa_bar_tp1, z_tp1);
pool.releaseAll();
return delta_t;
}
use of nars.rl.horde.math.VectorPool in project narchy by automenta.
the class GTDLambda method update.
@Override
public double update(double pi_t, double b_t, RealVector x_t, RealVector x_tp1, double r_tp1, double gamma_tp1, double z_tp1) {
if (x_t == null)
return initEpisode(gamma_tp1);
VectorPool pool = VectorPools.pool(e.vect());
v_t = v.dotProduct(x_t);
delta_t = r_tp1 + (1 - gamma_tp1) * z_tp1 + gamma_tp1 * v.dotProduct(x_tp1) - v_t;
// Update traces
e.update(gamma_t * lambda, x_t);
double rho_t = pi_t / b_t;
e.vect().mapMultiplyToSelf(rho_t);
// Compute correction
ArrayRealVector correctionVector = pool.newVector();
if (x_tp1 != null) {
correction = e.vect().dotProduct(w);
correctionVector.combineToSelf(1, correction * gamma_tp1 * (1 - lambda), x_tp1);
}
// Update parameters
RealVector deltaE = pool.newVector(e.vect()).mapMultiplyToSelf(delta_t);
v.combineToSelf(1, alpha_v, pool.newVector(deltaE).combineToSelf(1, -1, correctionVector));
w.combineToSelf(1, alpha_w, deltaE.combineToSelf(1, -w.dotProduct(x_t), x_t));
deltaE = null;
gamma_t = gamma_tp1;
pool.releaseAll();
return delta_t;
}
Aggregations