Search in sources :

Example 1 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestMemoryIndex method testPointValuesDoNotAffectPositionsOrOffset.

public void testPointValuesDoNotAffectPositionsOrOffset() throws Exception {
    MemoryIndex mi = new MemoryIndex(true, true);
    mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), analyzer);
    mi.addField(new BinaryPoint("text", "quick".getBytes(StandardCharsets.UTF_8)), analyzer);
    mi.addField(new BinaryPoint("text", "brown".getBytes(StandardCharsets.UTF_8)), analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    TermsEnum tenum = leafReader.terms("text").iterator();
    assertEquals("brown", tenum.next().utf8ToString());
    PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(1, penum.nextPosition());
    assertEquals(6, penum.startOffset());
    assertEquals(11, penum.endOffset());
    assertEquals("fox", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(2, penum.nextPosition());
    assertEquals(12, penum.startOffset());
    assertEquals(15, penum.endOffset());
    assertEquals("quick", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(0, penum.nextPosition());
    assertEquals(0, penum.startOffset());
    assertEquals(5, penum.endOffset());
    IndexSearcher indexSearcher = mi.createSearcher();
    assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "quick".getBytes(StandardCharsets.UTF_8))));
    assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "brown".getBytes(StandardCharsets.UTF_8))));
    assertEquals(0, indexSearcher.count(BinaryPoint.newExactQuery("text", "jumps".getBytes(StandardCharsets.UTF_8))));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) TextField(org.apache.lucene.document.TextField) PostingsEnum(org.apache.lucene.index.PostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 2 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestMemoryIndex method testToStringDebug.

public void testToStringDebug() {
    MemoryIndex mi = new MemoryIndex(true, true);
    Analyzer analyzer = new MockPayloadAnalyzer();
    mi.addField("analyzedField", "aa bb aa", analyzer);
    FieldType type = new FieldType();
    type.setDimensions(1, 4);
    type.setDocValuesType(DocValuesType.BINARY);
    type.freeze();
    mi.addField(new BinaryPoint("pointAndDvField", "term".getBytes(StandardCharsets.UTF_8), type), analyzer);
    assertEquals("analyzedField:\n" + "\t'[61 61]':2: [(0, 0, 2, [70 6f 73 3a 20 30]), (1, 6, 8, [70 6f 73 3a 20 32])]\n" + "\t'[62 62]':1: [(1, 3, 5, [70 6f 73 3a 20 31])]\n" + "\tterms=2, positions=3\n" + "pointAndDvField:\n" + "\tterms=0, positions=0\n" + "\n" + "fields=2, terms=2, positions=3", mi.toStringDebug());
}
Also used : BinaryPoint(org.apache.lucene.document.BinaryPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) FieldType(org.apache.lucene.document.FieldType)

Example 3 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class BasePointsFormatTestCase method testBasic.

public void testBasic() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        NumericUtils.intToSortableBytes(i, point, 0);
        doc.add(new BinaryPoint("dim", point));
        w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues("dim");
    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect(new IntersectVisitor() {

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            return Relation.CELL_CROSSES_QUERY;
        }

        public void visit(int docID) {
            throw new IllegalStateException();
        }

        public void visit(int docID, byte[] packedValue) {
            seen.set(docID);
            assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
        }
    });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) Directory(org.apache.lucene.store.Directory)

Example 4 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class BasePointsFormatTestCase method testMerge.

public void testMerge() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        NumericUtils.intToSortableBytes(i, point, 0);
        doc.add(new BinaryPoint("dim", point));
        w.addDocument(doc);
        if (i == 10) {
            w.commit();
        }
    }
    w.forceMerge(1);
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues("dim");
    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect(new IntersectVisitor() {

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            return Relation.CELL_CROSSES_QUERY;
        }

        public void visit(int docID) {
            throw new IllegalStateException();
        }

        public void visit(int docID, byte[] packedValue) {
            seen.set(docID);
            assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
        }
    });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) Directory(org.apache.lucene.store.Directory)

Example 5 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestIndexSorting method testRandom2.

public void testRandom2() throws Exception {
    int numDocs = atLeast(100);
    FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
    POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    POSITIONS_TYPE.freeze();
    FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
    TERM_VECTORS_TYPE.setStoreTermVectors(true);
    TERM_VECTORS_TYPE.freeze();
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            return new TokenStreamComponents(tokenizer, tokenizer);
        }
    };
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        int id = i * 10;
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(id), Store.YES));
        doc.add(new StringField("docs", "#all#", Store.NO));
        PositionsTokenStream positions = new PositionsTokenStream();
        positions.setId(id);
        doc.add(new Field("positions", positions, POSITIONS_TYPE));
        doc.add(new NumericDocValuesField("numeric", id));
        String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
        TextField norms = new TextField("norms", value, Store.NO);
        doc.add(norms);
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
        doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
        doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
        doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
        doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
        doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
        byte[] bytes = new byte[4];
        NumericUtils.intToSortableBytes(id, bytes, 0);
        doc.add(new BinaryPoint("points", bytes));
        docs.add(doc);
    }
    // Must use the same seed for both RandomIndexWriters so they behave identically
    long seed = random().nextLong();
    // We add document alread in ID order for the first writer:
    Directory dir1 = newFSDirectory(createTempDir());
    Random random1 = new Random(seed);
    IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
    // for testing norms field
    iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity()));
    // preserve docIDs
    iwc1.setMergePolicy(newLogMergePolicy());
    if (VERBOSE) {
        System.out.println("TEST: now index pre-sorted");
    }
    RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
    for (Document doc : docs) {
        ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
        w1.addDocument(doc);
    }
    // We shuffle documents, but set index sort, for the second writer:
    Directory dir2 = newFSDirectory(createTempDir());
    Random random2 = new Random(seed);
    IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
    // for testing norms field
    iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity()));
    Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
    iwc2.setIndexSort(sort);
    Collections.shuffle(docs, random());
    if (VERBOSE) {
        System.out.println("TEST: now index with index-time sorting");
    }
    RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
    int count = 0;
    int commitAtCount = TestUtil.nextInt(random(), 1, numDocs - 1);
    for (Document doc : docs) {
        ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
        if (count++ == commitAtCount) {
            // Ensure forceMerge really does merge
            w2.commit();
        }
        w2.addDocument(doc);
    }
    if (VERBOSE) {
        System.out.println("TEST: now force merge");
    }
    w2.forceMerge(1);
    DirectoryReader r1 = w1.getReader();
    DirectoryReader r2 = w2.getReader();
    if (VERBOSE) {
        System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
    }
    assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
    assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
    IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
Also used : Query(org.apache.lucene.search.Query) ScoreDoc(org.apache.lucene.search.ScoreDoc) BinaryPoint(org.apache.lucene.document.BinaryPoint) FieldType(org.apache.lucene.document.FieldType) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericUtils(org.apache.lucene.util.NumericUtils) Random(java.util.Random) StoredField(org.apache.lucene.document.StoredField) FieldDoc(org.apache.lucene.search.FieldDoc) FilterCodec(org.apache.lucene.codecs.FilterCodec) NO_MORE_DOCS(org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS) Document(org.apache.lucene.document.Document) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) SortField(org.apache.lucene.search.SortField) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Set(java.util.Set) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Collectors(java.util.stream.Collectors) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PointsFormat(org.apache.lucene.codecs.PointsFormat) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) IndexSearcher(org.apache.lucene.search.IndexSearcher) IntStream(java.util.stream.IntStream) PointsReader(org.apache.lucene.codecs.PointsReader) Tokenizer(org.apache.lucene.analysis.Tokenizer) StringField(org.apache.lucene.document.StringField) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TestUtil(org.apache.lucene.util.TestUtil) HashMap(java.util.HashMap) FixedBitSet(org.apache.lucene.util.FixedBitSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Similarity(org.apache.lucene.search.similarities.Similarity) Store(org.apache.lucene.document.Field.Store) IntPoint(org.apache.lucene.document.IntPoint) TermStatistics(org.apache.lucene.search.TermStatistics) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) TopDocs(org.apache.lucene.search.TopDocs) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Analyzer(org.apache.lucene.analysis.Analyzer) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) Consumer(java.util.function.Consumer) PointsWriter(org.apache.lucene.codecs.PointsWriter) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermQuery(org.apache.lucene.search.TermQuery) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Collections(java.util.Collections) BinaryPoint(org.apache.lucene.document.BinaryPoint) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Random(java.util.Random) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) Sort(org.apache.lucene.search.Sort) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField)

Aggregations

BinaryPoint (org.apache.lucene.document.BinaryPoint)40 Document (org.apache.lucene.document.Document)38 Directory (org.apache.lucene.store.Directory)35 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 FSDirectory (org.apache.lucene.store.FSDirectory)18 RAMDirectory (org.apache.lucene.store.RAMDirectory)18 IntPoint (org.apache.lucene.document.IntPoint)17 IndexReader (org.apache.lucene.index.IndexReader)11 DoublePoint (org.apache.lucene.document.DoublePoint)10 FloatPoint (org.apache.lucene.document.FloatPoint)10 LongPoint (org.apache.lucene.document.LongPoint)10 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)10 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 BitSet (java.util.BitSet)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7 IndexWriter (org.apache.lucene.index.IndexWriter)7 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)7 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)5 FieldType (org.apache.lucene.document.FieldType)4