Search in sources :

Example 81 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestTermAutomatonQuery method testRandom.

public void testRandom() throws Exception {
    int numDocs = atLeast(100);
    Directory dir = newDirectory();
    // Adds occassional random synonyms:
    Analyzer analyzer = new Analyzer() {

        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true, 100);
            tokenizer.setEnableChecks(true);
            TokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
            filt = new RandomSynonymFilter(filt);
            return new TokenStreamComponents(tokenizer, filt);
        }
    };
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numTokens = atLeast(10);
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < numTokens; j++) {
            sb.append(' ');
            sb.append((char) (97 + random().nextInt(3)));
        }
        String contents = sb.toString();
        doc.add(newTextField("field", contents, Field.Store.NO));
        doc.add(new StoredField("id", "" + i));
        if (VERBOSE) {
            System.out.println("  doc " + i + " -> " + contents);
        }
        w.addDocument(doc);
    }
    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r);
    // Used to match ANY using MultiPhraseQuery:
    Term[] allTerms = new Term[] { new Term("field", "a"), new Term("field", "b"), new Term("field", "c") };
    int numIters = atLeast(1000);
    for (int iter = 0; iter < numIters; iter++) {
        // Build the (finite, no any transitions) TermAutomatonQuery and
        // also the "equivalent" BooleanQuery and make sure they match the
        // same docs:
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        int count = TestUtil.nextInt(random(), 1, 5);
        Set<BytesRef> strings = new HashSet<>();
        for (int i = 0; i < count; i++) {
            StringBuilder sb = new StringBuilder();
            int numTokens = TestUtil.nextInt(random(), 1, 5);
            for (int j = 0; j < numTokens; j++) {
                if (j > 0 && j < numTokens - 1 && random().nextInt(5) == 3) {
                    sb.append('*');
                } else {
                    sb.append((char) (97 + random().nextInt(3)));
                }
            }
            String string = sb.toString();
            MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
            for (int j = 0; j < string.length(); j++) {
                if (string.charAt(j) == '*') {
                    mpqb.add(allTerms);
                } else {
                    mpqb.add(new Term("field", "" + string.charAt(j)));
                }
            }
            bq.add(mpqb.build(), BooleanClause.Occur.SHOULD);
            strings.add(new BytesRef(string));
        }
        List<BytesRef> stringsList = new ArrayList<>(strings);
        Collections.sort(stringsList);
        Automaton a = Automata.makeStringUnion(stringsList);
        // Translate automaton to query:
        TermAutomatonQuery q = new TermAutomatonQuery("field");
        int numStates = a.getNumStates();
        for (int i = 0; i < numStates; i++) {
            q.createState();
            q.setAccept(i, a.isAccept(i));
        }
        Transition t = new Transition();
        for (int i = 0; i < numStates; i++) {
            int transCount = a.initTransition(i, t);
            for (int j = 0; j < transCount; j++) {
                a.getNextTransition(t);
                for (int label = t.min; label <= t.max; label++) {
                    if ((char) label == '*') {
                        q.addAnyTransition(t.source, t.dest);
                    } else {
                        q.addTransition(t.source, t.dest, "" + (char) label);
                    }
                }
            }
        }
        q.finish();
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
            for (BytesRef string : stringsList) {
                System.out.println("  string: " + string.utf8ToString());
            }
            System.out.println(q.toDot());
        }
        Query q1 = q;
        Query q2 = bq.build();
        if (random().nextInt(5) == 1) {
            if (VERBOSE) {
                System.out.println("  use random filter");
            }
            RandomQuery filter = new RandomQuery(random().nextLong(), random().nextFloat());
            q1 = new BooleanQuery.Builder().add(q1, Occur.MUST).add(filter, Occur.FILTER).build();
            q2 = new BooleanQuery.Builder().add(q2, Occur.MUST).add(filter, Occur.FILTER).build();
        }
        TopDocs hits1 = s.search(q1, numDocs);
        TopDocs hits2 = s.search(q2, numDocs);
        Set<String> hits1Docs = toDocIDs(s, hits1);
        Set<String> hits2Docs = toDocIDs(s, hits2);
        try {
            assertEquals(hits2.totalHits, hits1.totalHits);
            assertEquals(hits2Docs, hits1Docs);
        } catch (AssertionError ae) {
            System.out.println("FAILED:");
            for (String id : hits1Docs) {
                if (hits2Docs.contains(id) == false) {
                    System.out.println(String.format(Locale.ROOT, "  id=%3s matched but should not have", id));
                }
            }
            for (String id : hits2Docs) {
                if (hits1Docs.contains(id) == false) {
                    System.out.println(String.format(Locale.ROOT, "  id=%3s did not match but should have", id));
                }
            }
            throw ae;
        }
    }
    IOUtils.close(w, r, dir, analyzer);
}
Also used : MockTokenFilter(org.apache.lucene.analysis.MockTokenFilter) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) MockTokenFilter(org.apache.lucene.analysis.MockTokenFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter) HashSet(java.util.HashSet) Automaton(org.apache.lucene.util.automaton.Automaton) Term(org.apache.lucene.index.Term) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) IndexReader(org.apache.lucene.index.IndexReader) Transition(org.apache.lucene.util.automaton.Transition) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 82 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestFunctionQuerySort method testSearchAfterWhenSortingByFunctionValues.

public void testSearchAfterWhenSortingByFunctionValues() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    // depends on docid order
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    Field field = new StoredField("value", 0);
    Field dvField = new NumericDocValuesField("value", 0);
    doc.add(field);
    doc.add(dvField);
    // Save docs unsorted (decreasing value n, n-1, ...)
    final int NUM_VALS = 5;
    for (int val = NUM_VALS; val > 0; val--) {
        field.setIntValue(val);
        dvField.setLongValue(val);
        writer.addDocument(doc);
    }
    // Open index
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    // Trivial ValueSource function that bypasses single field ValueSource sort optimization
    ValueSource src = new SumFloatFunction(new ValueSource[] { new IntFieldSource("value"), new DoubleConstValueSource(1.0D) });
    // ...and make it a sort criterion
    SortField sf = src.getSortField(false).rewrite(searcher);
    Sort orderBy = new Sort(sf);
    // Get hits sorted by our FunctionValues (ascending values)
    Query q = new MatchAllDocsQuery();
    TopDocs hits = searcher.search(q, reader.maxDoc(), orderBy);
    assertEquals(NUM_VALS, hits.scoreDocs.length);
    // Verify that sorting works in general
    int i = 0;
    for (ScoreDoc hit : hits.scoreDocs) {
        int valueFromDoc = Integer.parseInt(reader.document(hit.doc).get("value"));
        assertEquals(++i, valueFromDoc);
    }
    // Now get hits after hit #2 using IS.searchAfter()
    int afterIdx = 1;
    FieldDoc afterHit = (FieldDoc) hits.scoreDocs[afterIdx];
    hits = searcher.searchAfter(afterHit, q, reader.maxDoc(), orderBy);
    // Expected # of hits: NUM_VALS - 2
    assertEquals(NUM_VALS - (afterIdx + 1), hits.scoreDocs.length);
    // Verify that hits are actually "after"
    int afterValue = ((Double) afterHit.fields[0]).intValue();
    for (ScoreDoc hit : hits.scoreDocs) {
        int val = Integer.parseInt(reader.document(hit.doc).get("value"));
        assertTrue(afterValue <= val);
        assertFalse(hit.doc == afterHit.doc);
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldDoc(org.apache.lucene.search.FieldDoc) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) DoubleConstValueSource(org.apache.lucene.queries.function.valuesource.DoubleConstValueSource) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) StoredField(org.apache.lucene.document.StoredField) IntFieldSource(org.apache.lucene.queries.function.valuesource.IntFieldSource) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleConstValueSource(org.apache.lucene.queries.function.valuesource.DoubleConstValueSource) IndexReader(org.apache.lucene.index.IndexReader) SumFloatFunction(org.apache.lucene.queries.function.valuesource.SumFloatFunction) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 83 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class FunctionTestSetup method addDoc.

private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
    Document d = new Document();
    Field f;
    int scoreAndID = i + 1;
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setTokenized(false);
    customType.setOmitNorms(true);
    // for debug purposes
    f = newField(ID_FIELD, id2String(scoreAndID), customType);
    d.add(f);
    d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));
    FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
    customType2.setOmitNorms(true);
    // for regular search
    f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2);
    d.add(f);
    // for function scoring
    f = new StoredField(INT_FIELD, scoreAndID);
    d.add(f);
    d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));
    // for function scoring
    f = new StoredField(FLOAT_FIELD, scoreAndID);
    d.add(f);
    d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));
    f = new StoredField(INT_FIELD_MV_MIN, scoreAndID);
    d.add(f);
    f = new StoredField(INT_FIELD_MV_MIN, scoreAndID + 1);
    d.add(f);
    d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MIN, scoreAndID));
    d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MIN, scoreAndID + 1));
    f = new StoredField(INT_FIELD_MV_MAX, scoreAndID);
    d.add(f);
    f = new StoredField(INT_FIELD_MV_MAX, scoreAndID - 1);
    d.add(f);
    d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MAX, scoreAndID));
    d.add(new SortedNumericDocValuesField(INT_FIELD_MV_MAX, scoreAndID - 1));
    f = new StoredField(FLOAT_FIELD_MV_MIN, scoreAndID);
    d.add(f);
    f = new StoredField(FLOAT_FIELD_MV_MIN, scoreAndID + 1);
    d.add(f);
    d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MIN, NumericUtils.floatToSortableInt(scoreAndID)));
    d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MIN, NumericUtils.floatToSortableInt(scoreAndID + 1)));
    f = new StoredField(FLOAT_FIELD_MV_MAX, scoreAndID);
    d.add(f);
    f = new StoredField(FLOAT_FIELD_MV_MAX, scoreAndID - 1);
    d.add(f);
    d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MAX, NumericUtils.floatToSortableInt(scoreAndID)));
    d.add(new SortedNumericDocValuesField(FLOAT_FIELD_MV_MAX, NumericUtils.floatToSortableInt(scoreAndID - 1)));
    iw.addDocument(d);
    log("added: " + d);
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) FieldType(org.apache.lucene.document.FieldType)

Example 84 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestSortedSetVsStoredFields.

protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception {
    Directory dir = newFSDirectory(createTempDir("dvduel"));
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Set<String> valueSet = new HashSet<String>();
    for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) {
        final int length = TestUtil.nextInt(random(), minLength, maxLength);
        valueSet.add(TestUtil.randomSimpleString(random(), length));
    }
    String[] uniqueValues = valueSet.toArray(new String[0]);
    // index some docs
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
        doc.add(idField);
        int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
        // create a random set of strings
        Set<String> values = new TreeSet<>();
        for (int v = 0; v < numValues; v++) {
            values.add(RandomPicks.randomFrom(random(), uniqueValues));
        }
        // add ordered to the stored field
        for (String v : values) {
            doc.add(new StoredField("stored", v));
        }
        // add in any order to the dv field
        ArrayList<String> unordered = new ArrayList<>(values);
        Collections.shuffle(unordered, random());
        for (String v : unordered) {
            doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
        }
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    // compare
    DirectoryReader ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
        for (int i = 0; i < r.maxDoc(); i++) {
            String[] stringValues = r.document(i).getValues("stored");
            if (docValues != null) {
                if (docValues.docID() < i) {
                    docValues.nextDoc();
                }
            }
            if (docValues != null && stringValues.length > 0) {
                assertEquals(i, docValues.docID());
                for (int j = 0; j < stringValues.length; j++) {
                    assert docValues != null;
                    long ord = docValues.nextOrd();
                    assert ord != NO_MORE_ORDS;
                    BytesRef scratch = docValues.lookupOrd(ord);
                    assertEquals(stringValues[j], scratch.utf8ToString());
                }
                assertEquals(NO_MORE_ORDS, docValues.nextOrd());
            }
        }
    }
    ir.close();
    writer.forceMerge(1);
    // compare again
    ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
        for (int i = 0; i < r.maxDoc(); i++) {
            String[] stringValues = r.document(i).getValues("stored");
            if (docValues.docID() < i) {
                docValues.nextDoc();
            }
            if (docValues != null && stringValues.length > 0) {
                assertEquals(i, docValues.docID());
                for (int j = 0; j < stringValues.length; j++) {
                    assert docValues != null;
                    long ord = docValues.nextOrd();
                    assert ord != NO_MORE_ORDS;
                    BytesRef scratch = docValues.lookupOrd(ord);
                    assertEquals(stringValues[j], scratch.utf8ToString());
                }
                assertEquals(NO_MORE_ORDS, docValues.nextOrd());
            }
        }
    }
    ir.close();
    writer.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TreeSet(java.util.TreeSet) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 85 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testEmptyBinaryValueOnPageSizes.

// LUCENE-5218
public void testEmptyBinaryValueOnPageSizes() throws Exception {
    // followed by empty string value:
    for (int i = 0; i < 20; i++) {
        if (i > 14 && codecAcceptsHugeBinaryValues("field") == false) {
            break;
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir);
        BytesRef bytes = new BytesRef();
        bytes.bytes = new byte[1 << i];
        bytes.length = 1 << i;
        for (int j = 0; j < 4; j++) {
            Document doc = new Document();
            doc.add(new BinaryDocValuesField("field", bytes));
            w.addDocument(doc);
        }
        Document doc = new Document();
        doc.add(new StoredField("id", "5"));
        doc.add(new BinaryDocValuesField("field", new BytesRef()));
        w.addDocument(doc);
        IndexReader r = w.getReader();
        w.close();
        BinaryDocValues values = MultiDocValues.getBinaryValues(r, "field");
        for (int j = 0; j < 5; j++) {
            assertEquals(j, values.nextDoc());
            BytesRef result = values.binaryValue();
            assertTrue(result.length == 0 || result.length == 1 << i);
        }
        r.close();
        dir.close();
    }
}
Also used : StoredField(org.apache.lucene.document.StoredField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

StoredField (org.apache.lucene.document.StoredField)109 Document (org.apache.lucene.document.Document)97 Directory (org.apache.lucene.store.Directory)72 StringField (org.apache.lucene.document.StringField)43 Field (org.apache.lucene.document.Field)40 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)39 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)36 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)34 BytesRef (org.apache.lucene.util.BytesRef)34 TextField (org.apache.lucene.document.TextField)30 IndexReader (org.apache.lucene.index.IndexReader)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 IntPoint (org.apache.lucene.document.IntPoint)24 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)23 TopDocs (org.apache.lucene.search.TopDocs)23 SortField (org.apache.lucene.search.SortField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)21 Sort (org.apache.lucene.search.Sort)21 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)18 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18