Search in sources :

Example 46 with LongPoint

use of org.apache.lucene.document.LongPoint in project lucene-solr by apache.

the class TestRangeFacetCounts method testRandomLongs.

public void testRandomLongs() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    int numDocs = atLeast(1000);
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    long[] values = new long[numDocs];
    long minValue = Long.MAX_VALUE;
    long maxValue = Long.MIN_VALUE;
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        long v = random().nextLong();
        values[i] = v;
        doc.add(new NumericDocValuesField("field", v));
        doc.add(new LongPoint("field", v));
        w.addDocument(doc);
        minValue = Math.min(minValue, v);
        maxValue = Math.max(maxValue, v);
    }
    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r, false);
    FacetsConfig config = new FacetsConfig();
    int numIters = atLeast(10);
    for (int iter = 0; iter < numIters; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        int numRange = TestUtil.nextInt(random(), 1, 100);
        LongRange[] ranges = new LongRange[numRange];
        int[] expectedCounts = new int[numRange];
        long minAcceptedValue = Long.MAX_VALUE;
        long maxAcceptedValue = Long.MIN_VALUE;
        for (int rangeID = 0; rangeID < numRange; rangeID++) {
            long min;
            if (rangeID > 0 && random().nextInt(10) == 7) {
                // Use an existing boundary:
                LongRange prevRange = ranges[random().nextInt(rangeID)];
                if (random().nextBoolean()) {
                    min = prevRange.min;
                } else {
                    min = prevRange.max;
                }
            } else {
                min = random().nextLong();
            }
            long max;
            if (rangeID > 0 && random().nextInt(10) == 7) {
                // Use an existing boundary:
                LongRange prevRange = ranges[random().nextInt(rangeID)];
                if (random().nextBoolean()) {
                    max = prevRange.min;
                } else {
                    max = prevRange.max;
                }
            } else {
                max = random().nextLong();
            }
            if (min > max) {
                long x = min;
                min = max;
                max = x;
            }
            boolean minIncl;
            boolean maxIncl;
            // NOTE: max - min >= 0 is here to handle the common overflow case!
            if (max - min >= 0 && max - min < 2) {
                // If max == min or max == min+1, we always do inclusive, else we might pass an empty range and hit exc from LongRange's ctor:
                minIncl = true;
                maxIncl = true;
            } else {
                minIncl = random().nextBoolean();
                maxIncl = random().nextBoolean();
            }
            ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl);
            if (VERBOSE) {
                System.out.println("  range " + rangeID + ": " + ranges[rangeID]);
            }
            // expected count:
            for (int i = 0; i < numDocs; i++) {
                boolean accept = true;
                if (minIncl) {
                    accept &= values[i] >= min;
                } else {
                    accept &= values[i] > min;
                }
                if (maxIncl) {
                    accept &= values[i] <= max;
                } else {
                    accept &= values[i] < max;
                }
                if (accept) {
                    expectedCounts[rangeID]++;
                    minAcceptedValue = Math.min(minAcceptedValue, values[i]);
                    maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
                }
            }
        }
        FacetsCollector sfc = new FacetsCollector();
        s.search(new MatchAllDocsQuery(), sfc);
        Query fastMatchQuery;
        if (random().nextBoolean()) {
            if (random().nextBoolean()) {
                fastMatchQuery = LongPoint.newRangeQuery("field", minValue, maxValue);
            } else {
                fastMatchQuery = LongPoint.newRangeQuery("field", minAcceptedValue, maxAcceptedValue);
            }
        } else {
            fastMatchQuery = null;
        }
        LongValuesSource vs = LongValuesSource.fromLongField("field");
        Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchQuery, ranges);
        FacetResult result = facets.getTopChildren(10, "field");
        assertEquals(numRange, result.labelValues.length);
        for (int rangeID = 0; rangeID < numRange; rangeID++) {
            if (VERBOSE) {
                System.out.println("  range " + rangeID + " expectedCount=" + expectedCounts[rangeID]);
            }
            LabelAndValue subNode = result.labelValues[rangeID];
            assertEquals("r" + rangeID, subNode.label);
            assertEquals(expectedCounts[rangeID], subNode.value.intValue());
            LongRange range = ranges[rangeID];
            // Test drill-down:
            DrillDownQuery ddq = new DrillDownQuery(config);
            if (random().nextBoolean()) {
                ddq.add("field", LongPoint.newRangeQuery("field", range.min, range.max));
            } else {
                ddq.add("field", range.getQuery(fastMatchQuery, vs));
            }
            assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
        }
    }
    w.close();
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FacetsConfig(org.apache.lucene.facet.FacetsConfig) Query(org.apache.lucene.search.Query) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LabelAndValue(org.apache.lucene.facet.LabelAndValue) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) FacetsCollector(org.apache.lucene.facet.FacetsCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) FacetResult(org.apache.lucene.facet.FacetResult) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 47 with LongPoint

use of org.apache.lucene.document.LongPoint in project lucene-solr by apache.

the class TestRangeFacetCounts method testMixedRangeAndNonRangeTaxonomy.

/** Tests single request that mixes Range and non-Range
   *  faceting, with DrillSideways and taxonomy. */
public void testMixedRangeAndNonRangeTaxonomy() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Directory td = newDirectory();
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    for (long l = 0; l < 100; l++) {
        Document doc = new Document();
        // For computing range facet counts:
        doc.add(new NumericDocValuesField("field", l));
        // For drill down by numeric range:
        doc.add(new LongPoint("field", l));
        if ((l & 3) == 0) {
            doc.add(new FacetField("dim", "a"));
        } else {
            doc.add(new FacetField("dim", "b"));
        }
        w.addDocument(config.build(tw, doc));
    }
    final IndexReader r = w.getReader();
    final TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    IndexSearcher s = newSearcher(r, false);
    if (VERBOSE) {
        System.out.println("TEST: searcher=" + s);
    }
    DrillSideways ds = new DrillSideways(s, config, tr) {

        @Override
        protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
            FacetsCollector dimFC = drillDowns;
            FacetsCollector fieldFC = drillDowns;
            if (drillSideways != null) {
                for (int i = 0; i < drillSideways.length; i++) {
                    String dim = drillSidewaysDims[i];
                    if (dim.equals("field")) {
                        fieldFC = drillSideways[i];
                    } else {
                        dimFC = drillSideways[i];
                    }
                }
            }
            Map<String, Facets> byDim = new HashMap<>();
            byDim.put("field", new LongRangeFacetCounts("field", fieldFC, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
            byDim.put("dim", getTaxonomyFacetCounts(taxoReader, config, dimFC));
            return new MultiFacets(byDim, null);
        }

        @Override
        protected boolean scoreSubDocsAtOnce() {
            return random().nextBoolean();
        }
    };
    // First search, no drill downs:
    DrillDownQuery ddq = new DrillDownQuery(config);
    DrillSidewaysResult dsr = ds.search(null, ddq, 10);
    assertEquals(100, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    // Second search, drill down on dim=b:
    ddq = new DrillDownQuery(config);
    ddq.add("dim", "b");
    dsr = ds.search(null, ddq, 10);
    assertEquals(75, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    // Third search, drill down on "less than or equal to 10":
    ddq = new DrillDownQuery(config);
    ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
    dsr = ds.search(null, ddq, 10);
    assertEquals(11, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    w.close();
    IOUtils.close(tw, tr, td, r, d);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) HashMap(java.util.HashMap) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsConfig(org.apache.lucene.facet.FacetsConfig) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) LongPoint(org.apache.lucene.document.LongPoint) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) IndexReader(org.apache.lucene.index.IndexReader) DrillSideways(org.apache.lucene.facet.DrillSideways) MultiFacets(org.apache.lucene.facet.MultiFacets) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 48 with LongPoint

use of org.apache.lucene.document.LongPoint in project lucene-solr by apache.

the class TestDocValuesQueries method doTestDuelPointRangeSortedRangeQuery.

private void doTestDuelPointRangeSortedRangeQuery(boolean sortedSet, int maxValuesPerDoc) throws IOException {
    final int iters = atLeast(10);
    for (int iter = 0; iter < iters; ++iter) {
        Directory dir = newDirectory();
        RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
        final int numDocs = atLeast(100);
        for (int i = 0; i < numDocs; ++i) {
            Document doc = new Document();
            final int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
            for (int j = 0; j < numValues; ++j) {
                final long value = TestUtil.nextLong(random(), -100, 10000);
                byte[] encoded = new byte[Long.BYTES];
                LongPoint.encodeDimension(value, encoded, 0);
                if (sortedSet) {
                    doc.add(new SortedSetDocValuesField("dv", new BytesRef(encoded)));
                } else {
                    doc.add(new SortedDocValuesField("dv", new BytesRef(encoded)));
                }
                doc.add(new LongPoint("idx", value));
            }
            iw.addDocument(doc);
        }
        if (random().nextBoolean()) {
            iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
        }
        final IndexReader reader = iw.getReader();
        final IndexSearcher searcher = newSearcher(reader, false);
        iw.close();
        for (int i = 0; i < 100; ++i) {
            long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
            long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
            byte[] encodedMin = new byte[Long.BYTES];
            byte[] encodedMax = new byte[Long.BYTES];
            LongPoint.encodeDimension(min, encodedMin, 0);
            LongPoint.encodeDimension(max, encodedMax, 0);
            boolean includeMin = true;
            boolean includeMax = true;
            if (random().nextBoolean()) {
                includeMin = false;
                min++;
            }
            if (random().nextBoolean()) {
                includeMax = false;
                max--;
            }
            final Query q1 = LongPoint.newRangeQuery("idx", min, max);
            final Query q2;
            if (sortedSet) {
                q2 = SortedSetDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            } else {
                q2 = SortedDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            }
            assertSameMatches(searcher, q1, q2, false);
        }
        reader.close();
        dir.close();
    }
}
Also used : LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 49 with LongPoint

use of org.apache.lucene.document.LongPoint in project lucene-solr by apache.

the class IndexFiles method indexDoc.

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();
        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);
        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));
        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}
Also used : StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) StringField(org.apache.lucene.document.StringField) BufferedReader(java.io.BufferedReader) TextField(org.apache.lucene.document.TextField) LongPoint(org.apache.lucene.document.LongPoint) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document)

Aggregations

LongPoint (org.apache.lucene.document.LongPoint)49 Document (org.apache.lucene.document.Document)41 Directory (org.apache.lucene.store.Directory)34 IndexReader (org.apache.lucene.index.IndexReader)26 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)26 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)20 DoublePoint (org.apache.lucene.document.DoublePoint)19 FloatPoint (org.apache.lucene.document.FloatPoint)16 IntPoint (org.apache.lucene.document.IntPoint)16 IndexSearcher (org.apache.lucene.search.IndexSearcher)16 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)15 IndexWriter (org.apache.lucene.index.IndexWriter)14 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 BinaryPoint (org.apache.lucene.document.BinaryPoint)8 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)8 StoredField (org.apache.lucene.document.StoredField)8 Term (org.apache.lucene.index.Term)7 BytesRef (org.apache.lucene.util.BytesRef)7 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5