Search in sources :

Example 61 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class VersionInfo method getMaxVersionFromIndex.

/**
   * Returns the highest version from the index, or 0L if no versions can be found in the index.
   */
public Long getMaxVersionFromIndex(IndexSearcher searcher) throws IOException {
    String versionFieldName = versionField.getName();
    log.debug("Refreshing highest value of {} for {} version buckets from index", versionFieldName, buckets.length);
    long maxVersionInIndex = 0L;
    // if indexed, then we have terms to get the max from
    if (versionField.indexed()) {
        LeafReader leafReader = SlowCompositeReaderWrapper.wrap(searcher.getIndexReader());
        Terms versionTerms = leafReader.terms(versionFieldName);
        Long max = (versionTerms != null) ? LegacyNumericUtils.getMaxLong(versionTerms) : null;
        if (max != null) {
            maxVersionInIndex = max.longValue();
            log.debug("Found MAX value {} from Terms for {} in index", maxVersionInIndex, versionFieldName);
        } else {
            log.debug("No terms found for {}, cannot seed version bucket highest value from index", versionFieldName);
        }
    } else {
        ValueSource vs = versionField.getType().getValueSource(versionField, null);
        Map funcContext = ValueSource.newContext(searcher);
        vs.createWeight(funcContext, searcher);
        // TODO: multi-thread this
        for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
            int maxDoc = ctx.reader().maxDoc();
            FunctionValues fv = vs.getValues(funcContext, ctx);
            for (int doc = 0; doc < maxDoc; doc++) {
                long v = fv.longVal(doc);
                maxVersionInIndex = Math.max(v, maxVersionInIndex);
            }
        }
    }
    return maxVersionInIndex;
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) ValueSource(org.apache.lucene.queries.function.ValueSource) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FunctionValues(org.apache.lucene.queries.function.FunctionValues) Map(java.util.Map)

Example 62 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class BooleanPerceptronClassifierTest method testBasicUsageWithQuery.

@Test
public void testBasicUsageWithQuery() throws Exception {
    TermQuery query = new TermQuery(new Term(textFieldName, "of"));
    LeafReader leafReader = null;
    try {
        MockAnalyzer analyzer = new MockAnalyzer(random());
        leafReader = getSampleIndex(analyzer);
        BooleanPerceptronClassifier classifier = new BooleanPerceptronClassifier(leafReader, analyzer, query, 1, null, booleanFieldName, textFieldName);
        checkCorrectClassification(classifier, TECHNOLOGY_INPUT, false);
        checkCorrectClassification(classifier, POLITICS_INPUT, true);
    } finally {
        if (leafReader != null) {
            leafReader.close();
        }
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Example 63 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class BooleanPerceptronClassifierTest method testPerformance.

@Test
public void testPerformance() throws Exception {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    LeafReader leafReader = getRandomIndex(analyzer, 100);
    try {
        long trainStart = System.currentTimeMillis();
        BooleanPerceptronClassifier classifier = new BooleanPerceptronClassifier(leafReader, analyzer, null, 1, null, booleanFieldName, textFieldName);
        long trainEnd = System.currentTimeMillis();
        long trainTime = trainEnd - trainStart;
        assertTrue("training took more than 10s: " + trainTime / 1000 + "s", trainTime < 10000);
        long evaluationStart = System.currentTimeMillis();
        ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(leafReader, classifier, booleanFieldName, textFieldName, -1);
        assertNotNull(confusionMatrix);
        long evaluationEnd = System.currentTimeMillis();
        long evaluationTime = evaluationEnd - evaluationStart;
        assertTrue("evaluation took more than 1m: " + evaluationTime / 1000 + "s", evaluationTime < 60000);
        double avgClassificationTime = confusionMatrix.getAvgClassificationTime();
        assertTrue(5000 > avgClassificationTime);
        double f1 = confusionMatrix.getF1Measure();
        assertTrue(f1 >= 0d);
        assertTrue(f1 <= 1d);
        double accuracy = confusionMatrix.getAccuracy();
        assertTrue(accuracy >= 0d);
        assertTrue(accuracy <= 1d);
        double recall = confusionMatrix.getRecall();
        assertTrue(recall >= 0d);
        assertTrue(recall <= 1d);
        double precision = confusionMatrix.getPrecision();
        assertTrue(precision >= 0d);
        assertTrue(precision <= 1d);
        Terms terms = MultiFields.getTerms(leafReader, booleanFieldName);
        TermsEnum iterator = terms.iterator();
        BytesRef term;
        while ((term = iterator.next()) != null) {
            String s = term.utf8ToString();
            recall = confusionMatrix.getRecall(s);
            assertTrue(recall >= 0d);
            assertTrue(recall <= 1d);
            precision = confusionMatrix.getPrecision(s);
            assertTrue(precision >= 0d);
            assertTrue(precision <= 1d);
            double f1Measure = confusionMatrix.getF1Measure(s);
            assertTrue(f1Measure >= 0d);
            assertTrue(f1Measure <= 1d);
        }
    } finally {
        leafReader.close();
    }
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) ConfusionMatrixGenerator(org.apache.lucene.classification.utils.ConfusionMatrixGenerator) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum) Test(org.junit.Test)

Example 64 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class BM25NBClassifierTest method testBasicUsage.

@Test
public void testBasicUsage() throws Exception {
    LeafReader leafReader = null;
    try {
        MockAnalyzer analyzer = new MockAnalyzer(random());
        leafReader = getSampleIndex(analyzer);
        BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
        checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    } finally {
        if (leafReader != null) {
            leafReader.close();
        }
    }
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Test(org.junit.Test)

Example 65 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class BM25NBClassifierTest method testBasicUsageWithQuery.

@Test
public void testBasicUsageWithQuery() throws Exception {
    LeafReader leafReader = null;
    try {
        MockAnalyzer analyzer = new MockAnalyzer(random());
        leafReader = getSampleIndex(analyzer);
        TermQuery query = new TermQuery(new Term(textFieldName, "not"));
        BM25NBClassifier classifier = new BM25NBClassifier(leafReader, analyzer, query, categoryFieldName, textFieldName);
        checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    } finally {
        if (leafReader != null) {
            leafReader.close();
        }
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Aggregations

LeafReader (org.apache.lucene.index.LeafReader)168 BytesRef (org.apache.lucene.util.BytesRef)65 Document (org.apache.lucene.document.Document)61 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)58 Directory (org.apache.lucene.store.Directory)56 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)55 DirectoryReader (org.apache.lucene.index.DirectoryReader)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)42 Test (org.junit.Test)36 IndexWriter (org.apache.lucene.index.IndexWriter)32 Terms (org.apache.lucene.index.Terms)30 TermsEnum (org.apache.lucene.index.TermsEnum)28 NumericDocValues (org.apache.lucene.index.NumericDocValues)24 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)24 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)21 IndexReader (org.apache.lucene.index.IndexReader)20 Term (org.apache.lucene.index.Term)20 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)18 Bits (org.apache.lucene.util.Bits)18