use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class VersionInfo method getMaxVersionFromIndex.
/**
* Returns the highest version from the index, or 0L if no versions can be found in the index.
*/
public Long getMaxVersionFromIndex(IndexSearcher searcher) throws IOException {
String versionFieldName = versionField.getName();
log.debug("Refreshing highest value of {} for {} version buckets from index", versionFieldName, buckets.length);
long maxVersionInIndex = 0L;
// if indexed, then we have terms to get the max from
if (versionField.indexed()) {
LeafReader leafReader = SlowCompositeReaderWrapper.wrap(searcher.getIndexReader());
Terms versionTerms = leafReader.terms(versionFieldName);
Long max = (versionTerms != null) ? LegacyNumericUtils.getMaxLong(versionTerms) : null;
if (max != null) {
maxVersionInIndex = max.longValue();
log.debug("Found MAX value {} from Terms for {} in index", maxVersionInIndex, versionFieldName);
} else {
log.debug("No terms found for {}, cannot seed version bucket highest value from index", versionFieldName);
}
} else {
ValueSource vs = versionField.getType().getValueSource(versionField, null);
Map funcContext = ValueSource.newContext(searcher);
vs.createWeight(funcContext, searcher);
// TODO: multi-thread this
for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
int maxDoc = ctx.reader().maxDoc();
FunctionValues fv = vs.getValues(funcContext, ctx);
for (int doc = 0; doc < maxDoc; doc++) {
long v = fv.longVal(doc);
maxVersionInIndex = Math.max(v, maxVersionInIndex);
}
}
}
return maxVersionInIndex;
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class BooleanPerceptronClassifierTest method testBasicUsageWithQuery.
@Test
public void testBasicUsageWithQuery() throws Exception {
TermQuery query = new TermQuery(new Term(textFieldName, "of"));
LeafReader leafReader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
leafReader = getSampleIndex(analyzer);
BooleanPerceptronClassifier classifier = new BooleanPerceptronClassifier(leafReader, analyzer, query, 1, null, booleanFieldName, textFieldName);
checkCorrectClassification(classifier, TECHNOLOGY_INPUT, false);
checkCorrectClassification(classifier, POLITICS_INPUT, true);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class BooleanPerceptronClassifierTest method testPerformance.
@Test
public void testPerformance() throws Exception {
MockAnalyzer analyzer = new MockAnalyzer(random());
LeafReader leafReader = getRandomIndex(analyzer, 100);
try {
long trainStart = System.currentTimeMillis();
BooleanPerceptronClassifier classifier = new BooleanPerceptronClassifier(leafReader, analyzer, null, 1, null, booleanFieldName, textFieldName);
long trainEnd = System.currentTimeMillis();
long trainTime = trainEnd - trainStart;
assertTrue("training took more than 10s: " + trainTime / 1000 + "s", trainTime < 10000);
long evaluationStart = System.currentTimeMillis();
ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(leafReader, classifier, booleanFieldName, textFieldName, -1);
assertNotNull(confusionMatrix);
long evaluationEnd = System.currentTimeMillis();
long evaluationTime = evaluationEnd - evaluationStart;
assertTrue("evaluation took more than 1m: " + evaluationTime / 1000 + "s", evaluationTime < 60000);
double avgClassificationTime = confusionMatrix.getAvgClassificationTime();
assertTrue(5000 > avgClassificationTime);
double f1 = confusionMatrix.getF1Measure();
assertTrue(f1 >= 0d);
assertTrue(f1 <= 1d);
double accuracy = confusionMatrix.getAccuracy();
assertTrue(accuracy >= 0d);
assertTrue(accuracy <= 1d);
double recall = confusionMatrix.getRecall();
assertTrue(recall >= 0d);
assertTrue(recall <= 1d);
double precision = confusionMatrix.getPrecision();
assertTrue(precision >= 0d);
assertTrue(precision <= 1d);
Terms terms = MultiFields.getTerms(leafReader, booleanFieldName);
TermsEnum iterator = terms.iterator();
BytesRef term;
while ((term = iterator.next()) != null) {
String s = term.utf8ToString();
recall = confusionMatrix.getRecall(s);
assertTrue(recall >= 0d);
assertTrue(recall <= 1d);
precision = confusionMatrix.getPrecision(s);
assertTrue(precision >= 0d);
assertTrue(precision <= 1d);
double f1Measure = confusionMatrix.getF1Measure(s);
assertTrue(f1Measure >= 0d);
assertTrue(f1Measure <= 1d);
}
} finally {
leafReader.close();
}
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class BM25NBClassifierTest method testBasicUsage.
@Test
public void testBasicUsage() throws Exception {
LeafReader leafReader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
leafReader = getSampleIndex(analyzer);
BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class BM25NBClassifierTest method testBasicUsageWithQuery.
@Test
public void testBasicUsageWithQuery() throws Exception {
LeafReader leafReader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
leafReader = getSampleIndex(analyzer);
TermQuery query = new TermQuery(new Term(textFieldName, "not"));
BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, query, categoryFieldName, textFieldName);
checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
Aggregations