Search in sources :

Example 31 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class ICUCollationField method createFields.

@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
    if (field.hasDocValues()) {
        List<IndexableField> fields = new ArrayList<>();
        fields.add(createField(field, value));
        final BytesRef bytes = getCollationKey(field.getName(), value.toString());
        if (field.multiValued()) {
            fields.add(new SortedSetDocValuesField(field.getName(), bytes));
        } else {
            fields.add(new SortedDocValuesField(field.getName(), bytes));
        }
        return fields;
    } else {
        return Collections.singletonList(createField(field, value));
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) ArrayList(java.util.ArrayList) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 32 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class BoolFieldSource method createFields.

@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
    IndexableField fval = createField(field, value);
    if (field.hasDocValues()) {
        IndexableField docval;
        final BytesRef bytes = new BytesRef(toInternal(value.toString()));
        if (field.multiValued()) {
            docval = new SortedSetDocValuesField(field.getName(), bytes);
        } else {
            docval = new SortedDocValuesField(field.getName(), bytes);
        }
        // Only create a list of we have 2 values...
        if (fval != null) {
            List<IndexableField> fields = new ArrayList<>(2);
            fields.add(fval);
            fields.add(docval);
            return fields;
        }
        fval = docval;
    }
    return Collections.singletonList(fval);
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) ArrayList(java.util.ArrayList) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 33 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class CollationField method createFields.

@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
    if (field.hasDocValues()) {
        List<IndexableField> fields = new ArrayList<>();
        fields.add(createField(field, value));
        final BytesRef bytes = getCollationKey(field.getName(), value.toString());
        if (field.multiValued()) {
            fields.add(new SortedSetDocValuesField(field.getName(), bytes));
        } else {
            fields.add(new SortedDocValuesField(field.getName(), bytes));
        }
        return fields;
    } else {
        return Collections.singletonList(createField(field, value));
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) ArrayList(java.util.ArrayList) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 34 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class DistinctValuesCollectorTest method createIndexContext.

private IndexContext createIndexContext() throws Exception {
    Random random = random();
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
    String[] groupValues = new String[numDocs / 5];
    String[] countValues = new String[numDocs / 10];
    for (int i = 0; i < groupValues.length; i++) {
        groupValues[i] = generateRandomNonEmptyString();
    }
    for (int i = 0; i < countValues.length; i++) {
        countValues[i] = generateRandomNonEmptyString();
    }
    List<String> contentStrings = new ArrayList<>();
    Map<String, Map<String, Set<String>>> searchTermToGroupCounts = new HashMap<>();
    for (int i = 1; i <= numDocs; i++) {
        String groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.length)];
        String countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.length)];
        String content = "random" + random.nextInt(numDocs / 20);
        Map<String, Set<String>> groupToCounts = searchTermToGroupCounts.get(content);
        if (groupToCounts == null) {
            // Groups sort always DOCID asc...
            searchTermToGroupCounts.put(content, groupToCounts = new LinkedHashMap<>());
            contentStrings.add(content);
        }
        Set<String> countsVals = groupToCounts.get(groupValue);
        if (countsVals == null) {
            groupToCounts.put(groupValue, countsVals = new HashSet<>());
        }
        countsVals.add(countValue);
        Document doc = new Document();
        doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
        doc.add(new SortedDocValuesField("id", new BytesRef(String.format(Locale.ROOT, "%09d", i))));
        if (groupValue != null) {
            addField(doc, GROUP_FIELD, groupValue);
        }
        if (countValue != null) {
            addField(doc, COUNT_FIELD, countValue);
        }
        doc.add(new TextField("content", content, Field.Store.YES));
        w.addDocument(doc);
    }
    DirectoryReader reader = w.getReader();
    if (VERBOSE) {
        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Document doc = reader.document(docID);
            System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
        }
    }
    w.close();
    return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) LinkedHashMap(java.util.LinkedHashMap) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 35 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testBasic.

public void testBasic() throws Exception {
    final String groupField = "author";
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    DocValuesType valueType = DocValuesType.SORTED;
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 1));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 2));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("2")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 3));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("3")));
    w.addDocument(doc);
    // To ensure a second segment
    w.commit();
    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2", valueType);
    doc.add(newTextField("content", "some random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 4));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("4")));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "some more random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 5));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("5")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "random blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 7 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 7));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("7")));
    w.addDocument(doc);
    IndexReader reader = w.getReader();
    IndexSearcher indexSearcher = newSearcher(reader);
    w.close();
    int maxDoc = reader.maxDoc();
    Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type.INT, true));
    AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    // STRING sort type triggers different implementation
    Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type.STRING, true));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup2);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type.STRING, false));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup3);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    // 7 b/c higher doc id wins, even if order of field is in not in reverse.
    assertTrue(arrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)153 BytesRef (org.apache.lucene.util.BytesRef)152 Document (org.apache.lucene.document.Document)137 Directory (org.apache.lucene.store.Directory)109 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)87 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)66 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)53 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)35 StringField (org.apache.lucene.document.StringField)33 TextField (org.apache.lucene.document.TextField)31 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)30 Field (org.apache.lucene.document.Field)30 IndexReader (org.apache.lucene.index.IndexReader)30 Term (org.apache.lucene.index.Term)28 ArrayList (java.util.ArrayList)27 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)25 TermQuery (org.apache.lucene.search.TermQuery)21 IntPoint (org.apache.lucene.document.IntPoint)20 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18