Examples with BinaryPoint - org.apache.lucene.document.BinaryPoint

Example 36 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalDimChangeViaAddIndexesDirectory.

public void testIllegalDimChangeViaAddIndexesDirectory() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    w.close();
    Directory dir2 = newDirectory();
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(new MockAnalyzer(random())));
    doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4], new byte[4]));
    w2.addDocument(doc);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        w2.addIndexes(new Directory[] { dir });
    });
    assertEquals("cannot change point dimension count from 2 to 1 for field=\"dim\"", expected.getMessage());
    IOUtils.close(w2, dir, dir2);
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 37 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointQueries method verifyBinary.

// verify for byte[][] values
private void verifyBinary(byte[][][] docValues, int[] ids, int numBytesPerDim) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    int numDims = docValues[0].length;
    int bytesPerDim = docValues[0][0].length;
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < docValues.length / 100) {
        iwc.setMaxBufferedDocs(docValues.length / 100);
    }
    iwc.setCodec(getCodec());
    Directory dir;
    if (docValues.length > 100000) {
        dir = newFSDirectory(createTempDir("TestPointQueries"));
    } else {
        dir = newDirectory();
    }
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    int numValues = docValues.length;
    if (VERBOSE) {
        System.out.println("TEST: numValues=" + numValues + " numDims=" + numDims + " numBytesPerDim=" + numBytesPerDim);
    }
    int missingPct = random().nextInt(100);
    int deletedPct = random().nextInt(100);
    if (VERBOSE) {
        System.out.println("  missingPct=" + missingPct);
        System.out.println("  deletedPct=" + deletedPct);
    }
    BitSet missing = new BitSet();
    BitSet deleted = new BitSet();
    Document doc = null;
    int lastID = -1;
    for (int ord = 0; ord < numValues; ord++) {
        int id = ids[ord];
        if (id != lastID) {
            if (random().nextInt(100) < missingPct) {
                missing.set(id);
                if (VERBOSE) {
                    System.out.println("  missing id=" + id);
                }
            }
            if (doc != null) {
                w.addDocument(doc);
                if (random().nextInt(100) < deletedPct) {
                    int idToDelete = random().nextInt(id);
                    w.deleteDocuments(new Term("id", "" + idToDelete));
                    deleted.set(idToDelete);
                    if (VERBOSE) {
                        System.out.println("  delete id=" + idToDelete);
                    }
                }
            }
            doc = new Document();
            doc.add(newStringField("id", "" + id, Field.Store.NO));
            doc.add(new NumericDocValuesField("id", id));
            lastID = id;
        }
        if (missing.get(id) == false) {
            doc.add(new BinaryPoint("value", docValues[ord]));
            if (VERBOSE) {
                System.out.println("id=" + id);
                for (int dim = 0; dim < numDims; dim++) {
                    System.out.println("  dim=" + dim + " value=" + bytesToString(docValues[ord][dim]));
                }
            }
        }
    }
    w.addDocument(doc);
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("  forceMerge(1)");
        }
        w.forceMerge(1);
    }
    final IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r, false);
    int numThreads = TestUtil.nextInt(random(), 2, 5);
    if (VERBOSE) {
        System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
    }
    List<Thread> threads = new ArrayList<>();
    final int iters = atLeast(100);
    final CountDownLatch startingGun = new CountDownLatch(1);
    final AtomicBoolean failed = new AtomicBoolean();
    for (int i = 0; i < numThreads; i++) {
        Thread thread = new Thread() {

            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    failed.set(true);
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws Exception {
                startingGun.await();
                for (int iter = 0; iter < iters && failed.get() == false; iter++) {
                    byte[][] lower = new byte[numDims][];
                    byte[][] upper = new byte[numDims][];
                    for (int dim = 0; dim < numDims; dim++) {
                        lower[dim] = new byte[bytesPerDim];
                        random().nextBytes(lower[dim]);
                        upper[dim] = new byte[bytesPerDim];
                        random().nextBytes(upper[dim]);
                        if (StringHelper.compare(bytesPerDim, lower[dim], 0, upper[dim], 0) > 0) {
                            byte[] x = lower[dim];
                            lower[dim] = upper[dim];
                            upper[dim] = x;
                        }
                    }
                    if (VERBOSE) {
                        System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter);
                        for (int dim = 0; dim < numDims; dim++) {
                            System.out.println("  dim=" + dim + " " + bytesToString(lower[dim]) + " TO " + bytesToString(upper[dim]));
                        }
                    }
                    Query query = BinaryPoint.newRangeQuery("value", lower, upper);
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  using query: " + query);
                    }
                    final BitSet hits = new BitSet();
                    s.search(query, new SimpleCollector() {

                        private int docBase;

                        @Override
                        public boolean needsScores() {
                            return false;
                        }

                        @Override
                        protected void doSetNextReader(LeafReaderContext context) throws IOException {
                            docBase = context.docBase;
                        }

                        @Override
                        public void collect(int doc) {
                            hits.set(docBase + doc);
                        }
                    });
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  hitCount: " + hits.cardinality());
                    }
                    BitSet expected = new BitSet();
                    for (int ord = 0; ord < numValues; ord++) {
                        int id = ids[ord];
                        if (missing.get(id) == false && deleted.get(id) == false && matches(bytesPerDim, lower, upper, docValues[ord])) {
                            expected.set(id);
                        }
                    }
                    NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
                    int failCount = 0;
                    for (int docID = 0; docID < r.maxDoc(); docID++) {
                        assertEquals(docID, docIDToID.nextDoc());
                        int id = (int) docIDToID.longValue();
                        if (hits.get(docID) != expected.get(id)) {
                            System.out.println("FAIL: iter=" + iter + " id=" + id + " docID=" + docID + " expected=" + expected.get(id) + " but got " + hits.get(docID) + " deleted?=" + deleted.get(id) + " missing?=" + missing.get(id));
                            for (int dim = 0; dim < numDims; dim++) {
                                System.out.println("  dim=" + dim + " range: " + bytesToString(lower[dim]) + " TO " + bytesToString(upper[dim]));
                                failCount++;
                            }
                        }
                    }
                    if (failCount != 0) {
                        fail(failCount + " hits were wrong");
                    }
                }
            }
        };
        thread.setName("T" + i);
        thread.start();
        threads.add(thread);
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    IOUtils.close(r, dir);
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) BitSet(java.util.BitSet) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 38 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointQueries method testEmptyPointInSetQuery.

public void testEmptyPointInSetQuery() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setCodec(getCodec());
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new IntPoint("int", 17));
    doc.add(new LongPoint("long", 17L));
    doc.add(new FloatPoint("float", 17.0f));
    doc.add(new DoublePoint("double", 17.0));
    doc.add(new BinaryPoint("bytes", new byte[] { 0, 17 }));
    w.addDocument(doc);
    IndexReader r = DirectoryReader.open(w);
    IndexSearcher s = newSearcher(r, false);
    assertEquals(0, s.count(IntPoint.newSetQuery("int")));
    assertEquals(0, s.count(LongPoint.newSetQuery("long")));
    assertEquals(0, s.count(FloatPoint.newSetQuery("float")));
    assertEquals(0, s.count(DoublePoint.newSetQuery("double")));
    assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes")));
    w.close();
    r.close();
    dir.close();
}

Example 39 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointQueries method testPointInSetQueryManyEqualValuesWithBigGap.

public void testPointInSetQueryManyEqualValuesWithBigGap() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setCodec(getCodec());
    IndexWriter w = new IndexWriter(dir, iwc);
    int zeroCount = 0;
    for (int i = 0; i < 10000; i++) {
        int x = 200 * random().nextInt(2);
        if (x == 0) {
            zeroCount++;
        }
        Document doc = new Document();
        doc.add(new IntPoint("int", x));
        doc.add(new LongPoint("long", (long) x));
        doc.add(new FloatPoint("float", (float) x));
        doc.add(new DoublePoint("double", (double) x));
        doc.add(new BinaryPoint("bytes", new byte[] { (byte) x }));
        w.addDocument(doc);
    }
    IndexReader r = DirectoryReader.open(w);
    IndexSearcher s = newSearcher(r, false);
    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
    assertEquals(10000 - zeroCount, s.count(IntPoint.newSetQuery("int", 200)));
    assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
    assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0)));
    assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0, -7)));
    assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 7, 0)));
    assertEquals(10000 - zeroCount, s.count(LongPoint.newSetQuery("long", 200)));
    assertEquals(0, s.count(LongPoint.newSetQuery("long", 2)));
    assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0)));
    assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0, -7)));
    assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 7, 0)));
    assertEquals(10000 - zeroCount, s.count(FloatPoint.newSetQuery("float", 200)));
    assertEquals(0, s.count(FloatPoint.newSetQuery("float", 2)));
    assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0)));
    assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0, -7)));
    assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 7, 0)));
    assertEquals(10000 - zeroCount, s.count(DoublePoint.newSetQuery("double", 200)));
    assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2)));
    assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] { 0 })));
    assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] { 0 }, new byte[] { -7 })));
    assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] { 7 }, new byte[] { 0 })));
    assertEquals(10000 - zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] { (byte) 200 })));
    assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] { 2 })));
    w.close();
    r.close();
    dir.close();
}

Also used : IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DoublePoint(org.apache.lucene.document.DoublePoint) IndexReader(org.apache.lucene.index.IndexReader) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 40 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestUtil method cloneDocument.

// NOTE: this is likely buggy, and cannot clone fields
// with tokenStreamValues, etc.  Use at your own risk!!
// TODO: is there a pre-existing way to do this!!!
public static Document cloneDocument(Document doc1) {
    final Document doc2 = new Document();
    for (IndexableField f : doc1.getFields()) {
        final Field field1 = (Field) f;
        final Field field2;
        final DocValuesType dvType = field1.fieldType().docValuesType();
        final int dimCount = field1.fieldType().pointDimensionCount();
        if (dvType != DocValuesType.NONE) {
            switch(dvType) {
                case NUMERIC:
                    field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue());
                    break;
                case BINARY:
                    field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue());
                    break;
                case SORTED:
                    field2 = new SortedDocValuesField(field1.name(), field1.binaryValue());
                    break;
                default:
                    throw new IllegalStateException("unknown Type: " + dvType);
            }
        } else if (dimCount != 0) {
            BytesRef br = field1.binaryValue();
            byte[] bytes = new byte[br.length];
            System.arraycopy(br.bytes, br.offset, bytes, 0, br.length);
            field2 = new BinaryPoint(field1.name(), bytes, field1.fieldType());
        } else {
            field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
        }
        doc2.add(field2);
    }
    return doc2;
}

Also used : IndexableField(org.apache.lucene.index.IndexableField) IndexableField(org.apache.lucene.index.IndexableField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BinaryPoint(org.apache.lucene.document.BinaryPoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DocValuesType(org.apache.lucene.index.DocValuesType) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryPoint(org.apache.lucene.document.BinaryPoint)

Aggregations

BinaryPoint (org.apache.lucene.document.BinaryPoint)40 Document (org.apache.lucene.document.Document)38 Directory (org.apache.lucene.store.Directory)35 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 FSDirectory (org.apache.lucene.store.FSDirectory)18 RAMDirectory (org.apache.lucene.store.RAMDirectory)18 IntPoint (org.apache.lucene.document.IntPoint)17 IndexReader (org.apache.lucene.index.IndexReader)11 DoublePoint (org.apache.lucene.document.DoublePoint)10 FloatPoint (org.apache.lucene.document.FloatPoint)10 LongPoint (org.apache.lucene.document.LongPoint)10 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)10 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 BitSet (java.util.BitSet)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7 IndexWriter (org.apache.lucene.index.IndexWriter)7 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)7 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)5 FieldType (org.apache.lucene.document.FieldType)4