Search in sources :

Example 71 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestPointQueries method testRandomPointInSetQuery.

public void testRandomPointInSetQuery() throws Exception {
    boolean useNarrowRange = random().nextBoolean();
    final Integer valueMin;
    final Integer valueMax;
    int numValues;
    if (useNarrowRange) {
        int gap = random().nextInt(100);
        valueMin = random().nextInt(Integer.MAX_VALUE - gap);
        valueMax = valueMin + gap;
        numValues = TestUtil.nextInt(random(), 1, gap + 1);
    } else {
        valueMin = null;
        valueMax = null;
        numValues = TestUtil.nextInt(random(), 1, 100);
    }
    final Set<Integer> valuesSet = new HashSet<>();
    while (valuesSet.size() < numValues) {
        valuesSet.add(randomIntValue(valueMin, valueMax));
    }
    int[] values = toArray(valuesSet);
    int numDocs = TestUtil.nextInt(random(), 1, 10000);
    if (VERBOSE) {
        System.out.println("TEST: numValues=" + numValues + " numDocs=" + numDocs);
    }
    Directory dir;
    if (numDocs > 100000) {
        dir = newFSDirectory(createTempDir("TestPointQueries"));
    } else {
        dir = newDirectory();
    }
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setCodec(getCodec());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    int[] docValues = new int[numDocs];
    for (int i = 0; i < numDocs; i++) {
        int x = values[random().nextInt(values.length)];
        Document doc = new Document();
        doc.add(new IntPoint("int", x));
        docValues[i] = x;
        w.addDocument(doc);
    }
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("  forceMerge(1)");
        }
        w.forceMerge(1);
    }
    final IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r, false);
    int numThreads = TestUtil.nextInt(random(), 2, 5);
    if (VERBOSE) {
        System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
    }
    List<Thread> threads = new ArrayList<>();
    final int iters = atLeast(100);
    final CountDownLatch startingGun = new CountDownLatch(1);
    final AtomicBoolean failed = new AtomicBoolean();
    for (int i = 0; i < numThreads; i++) {
        Thread thread = new Thread() {

            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    failed.set(true);
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws Exception {
                startingGun.await();
                for (int iter = 0; iter < iters && failed.get() == false; iter++) {
                    int numValidValuesToQuery = random().nextInt(values.length);
                    Set<Integer> valuesToQuery = new HashSet<>();
                    while (valuesToQuery.size() < numValidValuesToQuery) {
                        valuesToQuery.add(values[random().nextInt(values.length)]);
                    }
                    int numExtraValuesToQuery = random().nextInt(20);
                    while (valuesToQuery.size() < numValidValuesToQuery + numExtraValuesToQuery) {
                        valuesToQuery.add(random().nextInt());
                    }
                    int expectedCount = 0;
                    for (int value : docValues) {
                        if (valuesToQuery.contains(value)) {
                            expectedCount++;
                        }
                    }
                    if (VERBOSE) {
                        System.out.println("TEST: thread=" + Thread.currentThread() + " values=" + valuesToQuery + " expectedCount=" + expectedCount);
                    }
                    assertEquals(expectedCount, s.count(IntPoint.newSetQuery("int", toArray(valuesToQuery))));
                }
            }
        };
        thread.setName("T" + i);
        thread.start();
        threads.add(thread);
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    IOUtils.close(r, dir);
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) IntPoint(org.apache.lucene.document.IntPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 72 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestIndexSorting method assertNeedsIndexSortMerge.

private static void assertNeedsIndexSortMerge(SortField sortField, Consumer<Document> defaultValueConsumer, Consumer<Document> randomValueConsumer) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
    iwc.setCodec(codec);
    Sort indexSort = new Sort(sortField, new SortField("id", SortField.Type.INT));
    iwc.setIndexSort(indexSort);
    LogMergePolicy policy = newLogMergePolicy();
    // make sure that merge factor is always > 2
    if (policy.getMergeFactor() <= 2) {
        policy.setMergeFactor(3);
    }
    iwc.setMergePolicy(policy);
    // add already sorted documents
    codec.numCalls = 0;
    codec.needsIndexSort = false;
    IndexWriter w = new IndexWriter(dir, iwc);
    boolean withValues = random().nextBoolean();
    for (int i = 100; i < 200; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        if (withValues) {
            defaultValueConsumer.accept(doc);
        }
        w.addDocument(doc);
        if (i % 10 == 0) {
            w.commit();
        }
    }
    Set<Integer> deletedDocs = new HashSet<>();
    int num = random().nextInt(20);
    for (int i = 0; i < num; i++) {
        int nextDoc = random().nextInt(100);
        w.deleteDocuments(new Term("id", Integer.toString(nextDoc)));
        deletedDocs.add(nextDoc);
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    // merge sort is needed
    codec.numCalls = 0;
    codec.needsIndexSort = true;
    for (int i = 10; i >= 0; i--) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        if (withValues) {
            defaultValueConsumer.accept(doc);
        }
        w.addDocument(doc);
        w.commit();
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    // segment sort is needed
    codec.needsIndexSort = true;
    codec.numCalls = 0;
    for (int i = 201; i < 300; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(new IntPoint("point", random().nextInt()));
        randomValueConsumer.accept(doc);
        w.addDocument(doc);
        if (i % 10 == 0) {
            w.commit();
        }
    }
    w.commit();
    w.waitForMerges();
    w.forceMerge(1);
    assertTrue(codec.numCalls > 0);
    w.close();
    dir.close();
}
Also used : SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 73 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestIndexWriterOnDiskFull method addDocWithIndex.

private void addDocWithIndex(IndexWriter writer, int index) throws IOException {
    Document doc = new Document();
    doc.add(newTextField("content", "aaa " + index, Field.Store.NO));
    doc.add(newTextField("id", "" + index, Field.Store.NO));
    doc.add(new NumericDocValuesField("numericdv", 1));
    doc.add(new IntPoint("point", 1));
    doc.add(new IntPoint("point2d", 1, 1));
    writer.addDocument(doc);
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Document(org.apache.lucene.document.Document)

Example 74 with IntPoint

use of org.apache.lucene.document.IntPoint in project lucene-solr by apache.

the class TestIndexWriterOnDiskFull method addDoc.

// TODO: these are also in TestIndexWriter... add a simple doc-writing method
// like this to LuceneTestCase?
private void addDoc(IndexWriter writer) throws IOException {
    Document doc = new Document();
    doc.add(newTextField("content", "aaa", Field.Store.NO));
    doc.add(new NumericDocValuesField("numericdv", 1));
    doc.add(new IntPoint("point", 1));
    doc.add(new IntPoint("point2d", 1, 1));
    writer.addDocument(doc);
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Document(org.apache.lucene.document.Document)

Example 75 with IntPoint

use of org.apache.lucene.document.IntPoint in project carbondata by apache.

the class LuceneDataMapWriter method addField.

private boolean addField(Document doc, ColumnPage page, int rowId, Field.Store store) {
    // get field name
    String fieldName = page.getColumnSpec().getFieldName();
    // get field type
    DataType type = page.getDataType();
    if (type == DataTypes.BYTE) {
        // byte type , use int range to deal with byte, lucene has no byte type
        byte value = page.getByte(rowId);
        IntRangeField field = new IntRangeField(fieldName, new int[] { Byte.MIN_VALUE }, new int[] { Byte.MAX_VALUE });
        field.setIntValue(value);
        doc.add(field);
        // if need store it , add StoredField
        if (store == Field.Store.YES) {
            doc.add(new StoredField(fieldName, (int) value));
        }
    } else if (type == DataTypes.SHORT) {
        // short type , use int range to deal with short type, lucene has no short type
        short value = page.getShort(rowId);
        IntRangeField field = new IntRangeField(fieldName, new int[] { Short.MIN_VALUE }, new int[] { Short.MAX_VALUE });
        field.setShortValue(value);
        doc.add(field);
        // if need store it , add StoredField
        if (store == Field.Store.YES) {
            doc.add(new StoredField(fieldName, (int) value));
        }
    } else if (type == DataTypes.INT) {
        // int type , use int point to deal with int type
        int value = page.getInt(rowId);
        doc.add(new IntPoint(fieldName, new int[] { value }));
        // if need store it , add StoredField
        if (store == Field.Store.YES) {
            doc.add(new StoredField(fieldName, value));
        }
    } else if (type == DataTypes.LONG) {
        // long type , use long point to deal with long type
        long value = page.getLong(rowId);
        doc.add(new LongPoint(fieldName, new long[] { value }));
        // if need store it , add StoredField
        if (store == Field.Store.YES) {
            doc.add(new StoredField(fieldName, value));
        }
    } else if (type == DataTypes.FLOAT) {
        float value = page.getFloat(rowId);
        doc.add(new FloatPoint(fieldName, new float[] { value }));
        if (store == Field.Store.YES) {
            doc.add(new FloatPoint(fieldName, value));
        }
    } else if (type == DataTypes.DOUBLE) {
        double value = page.getDouble(rowId);
        doc.add(new DoublePoint(fieldName, new double[] { value }));
        if (store == Field.Store.YES) {
            doc.add(new DoublePoint(fieldName, value));
        }
    } else if (type == DataTypes.STRING) {
        byte[] value = page.getBytes(rowId);
        // TODO: how to get string value
        String strValue = null;
        try {
            strValue = new String(value, 2, value.length - 2, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
        doc.add(new TextField(fieldName, strValue, store));
    } else if (type == DataTypes.DATE) {
    // TODO: how to get data value
    } else if (type == DataTypes.TIMESTAMP) {
    // TODO: how to get
    } else if (type == DataTypes.BOOLEAN) {
        boolean value = page.getBoolean(rowId);
        IntRangeField field = new IntRangeField(fieldName, new int[] { 0 }, new int[] { 1 });
        field.setIntValue(value ? 1 : 0);
        doc.add(field);
        if (store == Field.Store.YES) {
            doc.add(new StoredField(fieldName, value ? 1 : 0));
        }
    } else {
        LOGGER.error("unsupport data type " + type);
        throw new RuntimeException("unsupported data type " + type);
    }
    return true;
}
Also used : IntRangeField(org.apache.lucene.document.IntRangeField) UnsupportedEncodingException(java.io.UnsupportedEncodingException) LongPoint(org.apache.lucene.document.LongPoint) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) FloatPoint(org.apache.lucene.document.FloatPoint) DoublePoint(org.apache.lucene.document.DoublePoint) DataType(org.apache.carbondata.core.metadata.datatype.DataType) TextField(org.apache.lucene.document.TextField)

Aggregations

IntPoint (org.apache.lucene.document.IntPoint)81 Document (org.apache.lucene.document.Document)71 Directory (org.apache.lucene.store.Directory)47 LongPoint (org.apache.lucene.document.LongPoint)30 DoublePoint (org.apache.lucene.document.DoublePoint)29 FloatPoint (org.apache.lucene.document.FloatPoint)28 StoredField (org.apache.lucene.document.StoredField)27 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)25 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)24 IndexReader (org.apache.lucene.index.IndexReader)22 StringField (org.apache.lucene.document.StringField)21 IndexWriter (org.apache.lucene.index.IndexWriter)21 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)18 BytesRef (org.apache.lucene.util.BytesRef)18 BinaryPoint (org.apache.lucene.document.BinaryPoint)17 RAMDirectory (org.apache.lucene.store.RAMDirectory)16 Field (org.apache.lucene.document.Field)13 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IndexSearcher (org.apache.lucene.search.IndexSearcher)12