use of structures._QUPair in project IR_Base by Linda-sunshine.
the class L2RMetricLearning method createTrainingCorpus.
// In this training process, we want to get the weight of all pairs of samples.
protected int createTrainingCorpus() {
// pre-compute the similarity between labeled documents
calcLabeledSimilarities();
MyPriorityQueue<_RankItem> simRanker = new MyPriorityQueue<_RankItem>(m_topK);
ArrayList<_Doc> neighbors = new ArrayList<_Doc>();
_Query q;
_Doc di, dj;
int posQ = 0, negQ = 0, pairSize = 0;
int relevant = 0, irrelevant = 0;
for (int i = 0; i < m_trainSet.size(); i++) {
// candidate query document
di = m_trainSet.get(i);
relevant = 0;
irrelevant = 0;
// using content similarity to construct initial ranking
for (int j = 0; j < m_trainSet.size(); j++) {
if (i == j)
continue;
dj = m_trainSet.get(j);
simRanker.add(new _RankItem(j, m_LabeledCache[getIndex(i, j)]));
}
// find the top K similar documents by default similarity measure
for (_RankItem it : simRanker) {
dj = m_trainSet.get(it.m_index);
neighbors.add(dj);
if (di.getYLabel() == dj.getYLabel())
relevant++;
else
irrelevant++;
}
// inject some random neighbors
int j = 0;
while (neighbors.size() < (1.0 + m_noiseRatio) * m_topK) {
if (i != j) {
dj = m_trainSet.get(j);
if (Math.random() < 0.02 && !neighbors.contains(dj)) {
neighbors.add(dj);
if (di.getYLabel() == dj.getYLabel())
relevant++;
else
irrelevant++;
}
}
// until we use up all the random budget
j = (j + 1) % m_trainSet.size();
}
if (relevant == 0 || irrelevant == 0 || (di.getYLabel() == 1 && negQ < 1.1 * posQ)) {
// clear the cache for next query
simRanker.clear();
neighbors.clear();
continue;
} else if (di.getYLabel() == 1)
posQ++;
else
negQ++;
// accept the query
q = new _Query();
m_queries.add(q);
// construct features for the most similar documents with respect to the query di
for (_Doc d : neighbors) q.addQUPair(new _QUPair(d.getYLabel() == di.getYLabel() ? 1 : 0, genRankingFV(di, d)));
pairSize += q.createRankingPairs();
// clear the cache for next query
simRanker.clear();
neighbors.clear();
}
// normalize the features by z-score
normalize();
System.out.format("Generate %d(%d:%d) ranking pairs for L2R model training...\n", pairSize, posQ, negQ);
return pairSize;
}
use of structures._QUPair in project IR_Base by Linda-sunshine.
the class LambdaRank method evaluate.
protected double evaluate() {
double obj = 0, perf = 0, total = 0, r;
int misorder = 0;
for (_Query query : m_queries) {
if ((r = m_eval.eval(query)) >= 0) {
// ranking score should already be calculated
perf += r;
total++;
}
for (_QUPair pair : query.m_docList) {
if (pair.m_worseURLs != null) {
for (_QUPair worseURL : pair.m_worseURLs) {
if ((r = Utils.logistic(pair.m_score - worseURL.m_score)) > 0)
obj += Math.log(r);
if (pair.m_score <= worseURL.m_score)
misorder++;
}
}
if (pair.m_betterURLs != null) {
for (_QUPair betterURL : pair.m_betterURLs) {
if ((r = Utils.logistic(betterURL.m_score - pair.m_score)) > 0)
obj += Math.log(r);
if (pair.m_score >= betterURL.m_score)
misorder++;
}
}
}
}
perf /= total;
// to be maximized
obj -= 0.5 * m_lambda * Utils.L2Norm(m_weight);
System.out.format("%d\t%.2f\t%.4f\n", misorder / 2, obj, perf);
return perf;
}
use of structures._QUPair in project IR_Base by Linda-sunshine.
the class LambdaRankWorker method evaluate.
protected void evaluate() {
double r;
m_obj = 0;
m_perf = 0;
m_misorder = 0;
for (_Query query : m_queries) {
// calculate ranking score with latest weight
for (_QUPair pair : query.m_docList) pair.score(m_weight);
if (// ranking score should already be calculated
(r = m_eval.eval(query)) >= 0)
m_perf += r;
for (_QUPair pair : query.m_docList) {
if (pair.m_worseURLs != null) {
for (_QUPair worseURL : pair.m_worseURLs) {
if ((r = Utils.logistic(pair.m_score - worseURL.m_score)) > 0)
m_obj += Math.log(r);
if (pair.m_score <= worseURL.m_score)
m_misorder++;
}
}
if (pair.m_betterURLs != null) {
for (_QUPair betterURL : pair.m_betterURLs) {
if ((r = Utils.logistic(betterURL.m_score - pair.m_score)) > 0)
m_obj += Math.log(r);
if (pair.m_score >= betterURL.m_score)
m_misorder++;
}
}
}
}
m_misorder /= 2;
}
use of structures._QUPair in project IR_Base by Linda-sunshine.
the class NDCG_Evaluator method updateDeltas.
@Override
public void updateDeltas() {
super.updateDeltas();
// create cache for delta
HashMap<_QUPair, Double> change;
_QUPair qu1, qu2;
double delta;
for (int i = 0; i < Math.min(m_size, m_k); i++) {
qu1 = m_query.m_docList.get(i);
change = new HashMap<_QUPair, Double>();
for (int j = i + 1; j < m_size; j++) {
qu2 = m_query.m_docList.get(j);
if (j >= m_k) {
delta = (m_gain[qu1.m_y] - m_gain[qu2.m_y]) * m_discount[i];
} else {
delta = (m_gain[qu1.m_y] - m_gain[qu2.m_y]) * (m_discount[i] - m_discount[j]);
}
change.put(qu2, Math.abs(delta) / m_iDCG);
}
m_deltas.put(qu1, change);
}
}
use of structures._QUPair in project IR_Base by Linda-sunshine.
the class L2RMetricLearning method evaluate.
double evaluate(OptimizationType otype) {
Evaluator eval;
if (otype.equals(OptimizationType.OT_MAP))
eval = new MAP_Evaluator();
else if (otype.equals(OptimizationType.OT_NDCG))
eval = new NDCG_Evaluator(LambdaRank.NDCG_K);
else
eval = new Evaluator();
double perf = 0;
for (_Query q : m_queries) {
for (_QUPair qu : q.m_docList) qu.score(m_weights);
perf += eval.eval(q);
}
return perf / m_queries.size();
}
Aggregations