Examples with SortedSetDocValuesField - org.apache.lucene.document.SortedSetDocValuesField

Example 66 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestJoinUtil method addLinkFields.

private void addLinkFields(final Random random, Document document, final String fieldName, String linkValue, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) {
    document.add(newTextField(random, fieldName, linkValue, Field.Store.NO));
    final int linkInt = Integer.parseUnsignedInt(linkValue, 16);
    document.add(new IntPoint(fieldName + "INT", linkInt));
    document.add(new FloatPoint(fieldName + "FLOAT", linkInt));
    final long linkLong = linkInt << 32 | linkInt;
    document.add(new LongPoint(fieldName + "LONG", linkLong));
    document.add(new DoublePoint(fieldName + "DOUBLE", linkLong));
    if (multipleValuesPerDocument) {
        document.add(new SortedSetDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new SortedNumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new SortedNumericDocValuesField(fieldName + "FLOAT", Float.floatToRawIntBits(linkInt)));
        document.add(new SortedNumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new SortedNumericDocValuesField(fieldName + "DOUBLE", Double.doubleToRawLongBits(linkLong)));
    } else {
        document.add(new SortedDocValuesField(fieldName, new BytesRef(linkValue)));
        document.add(new NumericDocValuesField(fieldName + "INT", linkInt));
        document.add(new FloatDocValuesField(fieldName + "FLOAT", linkInt));
        document.add(new NumericDocValuesField(fieldName + "LONG", linkLong));
        document.add(new DoubleDocValuesField(fieldName + "DOUBLE", linkLong));
    }
    if (globalOrdinalJoin) {
        document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
    }
}

Also used : FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 67 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestOrdinalMap method testRamBytesUsed.

public void testRamBytesUsed() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(TestUtil.getDefaultDocValuesFormat()));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
    final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
    final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
    for (int i = 0; i < maxDoc; ++i) {
        Document d = new Document();
        if (random().nextBoolean()) {
            d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
        }
        final int numSortedSet = random().nextInt(3);
        for (int j = 0; j < numSortedSet; ++j) {
            d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
        }
        iw.addDocument(d);
        if (rarely()) {
            iw.getReader().close();
        }
    }
    iw.commit();
    DirectoryReader r = iw.getReader();
    SortedDocValues sdv = MultiDocValues.getSortedValues(r, "sdv");
    if (sdv instanceof MultiDocValues.MultiSortedDocValues) {
        OrdinalMap map = ((MultiDocValues.MultiSortedDocValues) sdv).mapping;
        assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
    }
    SortedSetDocValues ssdv = MultiDocValues.getSortedSetValues(r, "ssdv");
    if (ssdv instanceof MultiDocValues.MultiSortedSetDocValues) {
        OrdinalMap map = ((MultiDocValues.MultiSortedSetDocValues) ssdv).mapping;
        assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
    }
    iw.close();
    r.close();
    dir.close();
}

Also used : Document(org.apache.lucene.document.Document) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 68 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestMinShouldMatch2 method addSome.

private static void addSome(Document doc, String[] values) {
    List<String> list = Arrays.asList(values);
    Collections.shuffle(list, random());
    int howMany = TestUtil.nextInt(random(), 1, list.size());
    for (int i = 0; i < howMany; i++) {
        doc.add(new StringField("field", list.get(i), Field.Store.NO));
        doc.add(new SortedSetDocValuesField("dv", new BytesRef(list.get(i))));
    }
}

Also used : StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 69 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class GroupFacetCollectorTest method createIndexContext.

private IndexContext createIndexContext(boolean multipleFacetValuesPerDocument) throws IOException {
    final Random random = random();
    final int numDocs = TestUtil.nextInt(random, 138, 1145) * RANDOM_MULTIPLIER;
    final int numGroups = TestUtil.nextInt(random, 1, numDocs / 4);
    final int numFacets = TestUtil.nextInt(random, 1, numDocs / 6);
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
    }
    final List<String> groups = new ArrayList<>();
    for (int i = 0; i < numGroups; i++) {
        groups.add(generateRandomNonEmptyString());
    }
    final List<String> facetValues = new ArrayList<>();
    for (int i = 0; i < numFacets; i++) {
        facetValues.add(generateRandomNonEmptyString());
    }
    final String[] contentBrs = new String[TestUtil.nextInt(random, 2, 20)];
    if (VERBOSE) {
        System.out.println("TEST: create fake content");
    }
    for (int contentIDX = 0; contentIDX < contentBrs.length; contentIDX++) {
        contentBrs[contentIDX] = generateRandomNonEmptyString();
        if (VERBOSE) {
            System.out.println("  content=" + contentBrs[contentIDX]);
        }
    }
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)));
    Document doc = new Document();
    Document docNoGroup = new Document();
    Document docNoFacet = new Document();
    Document docNoGroupNoFacet = new Document();
    Field group = newStringField("group", "", Field.Store.NO);
    Field groupDc = new SortedDocValuesField("group", new BytesRef());
    doc.add(groupDc);
    docNoFacet.add(groupDc);
    doc.add(group);
    docNoFacet.add(group);
    Field[] facetFields;
    if (multipleFacetValuesPerDocument == false) {
        facetFields = new Field[2];
        facetFields[0] = newStringField("facet", "", Field.Store.NO);
        doc.add(facetFields[0]);
        docNoGroup.add(facetFields[0]);
        facetFields[1] = new SortedDocValuesField("facet", new BytesRef());
        doc.add(facetFields[1]);
        docNoGroup.add(facetFields[1]);
    } else {
        facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
        for (int i = 0; i < facetFields.length; i++) {
            facetFields[i] = new SortedSetDocValuesField("facet", new BytesRef());
            doc.add(facetFields[i]);
            docNoGroup.add(facetFields[i]);
        }
    }
    Field content = newStringField("content", "", Field.Store.NO);
    doc.add(content);
    docNoGroup.add(content);
    docNoFacet.add(content);
    docNoGroupNoFacet.add(content);
    NavigableSet<String> uniqueFacetValues = new TreeSet<>(new Comparator<String>() {

        @Override
        public int compare(String a, String b) {
            if (a == b) {
                return 0;
            } else if (a == null) {
                return -1;
            } else if (b == null) {
                return 1;
            } else {
                return a.compareTo(b);
            }
        }
    });
    Map<String, Map<String, Set<String>>> searchTermToFacetToGroups = new HashMap<>();
    int facetWithMostGroups = 0;
    for (int i = 0; i < numDocs; i++) {
        final String groupValue;
        if (random.nextInt(24) == 17) {
            // So we test the "doc doesn't have the group'd
            // field" case:
            groupValue = "";
        } else {
            groupValue = groups.get(random.nextInt(groups.size()));
        }
        String contentStr = contentBrs[random.nextInt(contentBrs.length)];
        if (!searchTermToFacetToGroups.containsKey(contentStr)) {
            searchTermToFacetToGroups.put(contentStr, new HashMap<String, Set<String>>());
        }
        Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
        List<String> facetVals = new ArrayList<>();
        if (multipleFacetValuesPerDocument == false) {
            String facetValue = facetValues.get(random.nextInt(facetValues.size()));
            uniqueFacetValues.add(facetValue);
            if (!facetToGroups.containsKey(facetValue)) {
                facetToGroups.put(facetValue, new HashSet<String>());
            }
            Set<String> groupsInFacet = facetToGroups.get(facetValue);
            groupsInFacet.add(groupValue);
            if (groupsInFacet.size() > facetWithMostGroups) {
                facetWithMostGroups = groupsInFacet.size();
            }
            facetFields[0].setStringValue(facetValue);
            facetFields[1].setBytesValue(new BytesRef(facetValue));
            facetVals.add(facetValue);
        } else {
            for (Field facetField : facetFields) {
                String facetValue = facetValues.get(random.nextInt(facetValues.size()));
                uniqueFacetValues.add(facetValue);
                if (!facetToGroups.containsKey(facetValue)) {
                    facetToGroups.put(facetValue, new HashSet<String>());
                }
                Set<String> groupsInFacet = facetToGroups.get(facetValue);
                groupsInFacet.add(groupValue);
                if (groupsInFacet.size() > facetWithMostGroups) {
                    facetWithMostGroups = groupsInFacet.size();
                }
                facetField.setBytesValue(new BytesRef(facetValue));
                facetVals.add(facetValue);
            }
        }
        if (VERBOSE) {
            System.out.println("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
        }
        if (groupValue != null) {
            groupDc.setBytesValue(new BytesRef(groupValue));
            group.setStringValue(groupValue);
        } else {
            // TODO: not true
            // DV cannot have missing values:
            groupDc.setBytesValue(new BytesRef());
        }
        content.setStringValue(contentStr);
        if (groupValue == null && facetVals.isEmpty()) {
            writer.addDocument(docNoGroupNoFacet);
        } else if (facetVals.isEmpty()) {
            writer.addDocument(docNoFacet);
        } else if (groupValue == null) {
            writer.addDocument(docNoGroup);
        } else {
            writer.addDocument(doc);
        }
    }
    DirectoryReader reader = writer.getReader();
    writer.close();
    return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
}

Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) NavigableSet(java.util.NavigableSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TreeSet(java.util.TreeSet) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) DirectoryReader(org.apache.lucene.index.DirectoryReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 70 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class GroupFacetCollectorTest method testMVGroupedFacetingWithDeletes.

public void testMVGroupedFacetingWithDeletes() throws Exception {
    final String groupField = "hotel";
    FieldType customType = new FieldType();
    customType.setStored(true);
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    boolean useDv = true;
    // Cannot assert this since we use NoMergePolicy:
    w.setDoRandomForceMergeAssert(false);
    // 0
    Document doc = new Document();
    doc.add(new StringField("x", "x", Field.Store.NO));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
    w.addDocument(doc);
    w.commit();
    w.deleteDocuments(new TermQuery(new Term("airport", "ams")));
    // 2
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
    w.addDocument(doc);
    // 3
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("dus")));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
    w.addDocument(doc);
    // 6
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
    w.addDocument(doc);
    w.commit();
    // 7
    doc = new Document();
    doc.add(new StringField("x", "x", Field.Store.NO));
    w.addDocument(doc);
    w.commit();
    w.close();
    IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir));
    GroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField + "_dv", "airport", null, true);
    indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
    TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
    assertEquals(3, airportResult.getTotalCount());
    assertEquals(1, airportResult.getTotalMissingCount());
    List<TermGroupFacetCollector.FacetEntry> entries = airportResult.getFacetEntries(0, 10);
    assertEquals(2, entries.size());
    assertEquals("ams", entries.get(0).getValue().utf8ToString());
    assertEquals(2, entries.get(0).getCount());
    assertEquals("dus", entries.get(1).getValue().utf8ToString());
    assertEquals(1, entries.get(1).getCount());
    indexSearcher.getIndexReader().close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldType(org.apache.lucene.document.FieldType) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)98 BytesRef (org.apache.lucene.util.BytesRef)96 Document (org.apache.lucene.document.Document)82 Directory (org.apache.lucene.store.Directory)74 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)38 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)36 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)27 IndexReader (org.apache.lucene.index.IndexReader)27 StringField (org.apache.lucene.document.StringField)23 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)20 ArrayList (java.util.ArrayList)18 Analyzer (org.apache.lucene.analysis.Analyzer)14 IndexableField (org.apache.lucene.index.IndexableField)13 Field (org.apache.lucene.document.Field)12 DirectoryReader (org.apache.lucene.index.DirectoryReader)11 LeafReader (org.apache.lucene.index.LeafReader)11 IntPoint (org.apache.lucene.document.IntPoint)10 StoredField (org.apache.lucene.document.StoredField)10