use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestJoinUtil method addLinkFields.
private void addLinkFields(final Random random, Document document, final String fieldName, String linkValue, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) {
document.add(newTextField(random, fieldName, linkValue, Field.Store.NO));
final int linkInt = Integer.parseUnsignedInt(linkValue, 16);
document.add(new IntPoint(fieldName + "INT", linkInt));
document.add(new FloatPoint(fieldName + "FLOAT", linkInt));
final long linkLong = linkInt << 32 | linkInt;
document.add(new LongPoint(fieldName + "LONG", linkLong));
document.add(new DoublePoint(fieldName + "DOUBLE", linkLong));
if (multipleValuesPerDocument) {
document.add(new SortedSetDocValuesField(fieldName, new BytesRef(linkValue)));
document.add(new SortedNumericDocValuesField(fieldName + "INT", linkInt));
document.add(new SortedNumericDocValuesField(fieldName + "FLOAT", Float.floatToRawIntBits(linkInt)));
document.add(new SortedNumericDocValuesField(fieldName + "LONG", linkLong));
document.add(new SortedNumericDocValuesField(fieldName + "DOUBLE", Double.doubleToRawLongBits(linkLong)));
} else {
document.add(new SortedDocValuesField(fieldName, new BytesRef(linkValue)));
document.add(new NumericDocValuesField(fieldName + "INT", linkInt));
document.add(new FloatDocValuesField(fieldName + "FLOAT", linkInt));
document.add(new NumericDocValuesField(fieldName + "LONG", linkLong));
document.add(new DoubleDocValuesField(fieldName + "DOUBLE", linkLong));
}
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestOrdinalMap method testRamBytesUsed.
public void testRamBytesUsed() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())).setCodec(TestUtil.alwaysDocValuesFormat(TestUtil.getDefaultDocValuesFormat()));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
final int maxDoc = TestUtil.nextInt(random(), 10, 1000);
final int maxTermLength = TestUtil.nextInt(random(), 1, 4);
for (int i = 0; i < maxDoc; ++i) {
Document d = new Document();
if (random().nextBoolean()) {
d.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
final int numSortedSet = random().nextInt(3);
for (int j = 0; j < numSortedSet; ++j) {
d.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), maxTermLength))));
}
iw.addDocument(d);
if (rarely()) {
iw.getReader().close();
}
}
iw.commit();
DirectoryReader r = iw.getReader();
SortedDocValues sdv = MultiDocValues.getSortedValues(r, "sdv");
if (sdv instanceof MultiDocValues.MultiSortedDocValues) {
OrdinalMap map = ((MultiDocValues.MultiSortedDocValues) sdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
}
SortedSetDocValues ssdv = MultiDocValues.getSortedSetValues(r, "ssdv");
if (ssdv instanceof MultiDocValues.MultiSortedSetDocValues) {
OrdinalMap map = ((MultiDocValues.MultiSortedSetDocValues) ssdv).mapping;
assertEquals(RamUsageTester.sizeOf(map, ORDINAL_MAP_ACCUMULATOR), map.ramBytesUsed());
}
iw.close();
r.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestMinShouldMatch2 method addSome.
private static void addSome(Document doc, String[] values) {
List<String> list = Arrays.asList(values);
Collections.shuffle(list, random());
int howMany = TestUtil.nextInt(random(), 1, list.size());
for (int i = 0; i < howMany; i++) {
doc.add(new StringField("field", list.get(i), Field.Store.NO));
doc.add(new SortedSetDocValuesField("dv", new BytesRef(list.get(i))));
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class GroupFacetCollectorTest method createIndexContext.
private IndexContext createIndexContext(boolean multipleFacetValuesPerDocument) throws IOException {
final Random random = random();
final int numDocs = TestUtil.nextInt(random, 138, 1145) * RANDOM_MULTIPLIER;
final int numGroups = TestUtil.nextInt(random, 1, numDocs / 4);
final int numFacets = TestUtil.nextInt(random, 1, numDocs / 6);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
}
final List<String> groups = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
groups.add(generateRandomNonEmptyString());
}
final List<String> facetValues = new ArrayList<>();
for (int i = 0; i < numFacets; i++) {
facetValues.add(generateRandomNonEmptyString());
}
final String[] contentBrs = new String[TestUtil.nextInt(random, 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for (int contentIDX = 0; contentIDX < contentBrs.length; contentIDX++) {
contentBrs[contentIDX] = generateRandomNonEmptyString();
if (VERBOSE) {
System.out.println(" content=" + contentBrs[contentIDX]);
}
}
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)));
Document doc = new Document();
Document docNoGroup = new Document();
Document docNoFacet = new Document();
Document docNoGroupNoFacet = new Document();
Field group = newStringField("group", "", Field.Store.NO);
Field groupDc = new SortedDocValuesField("group", new BytesRef());
doc.add(groupDc);
docNoFacet.add(groupDc);
doc.add(group);
docNoFacet.add(group);
Field[] facetFields;
if (multipleFacetValuesPerDocument == false) {
facetFields = new Field[2];
facetFields[0] = newStringField("facet", "", Field.Store.NO);
doc.add(facetFields[0]);
docNoGroup.add(facetFields[0]);
facetFields[1] = new SortedDocValuesField("facet", new BytesRef());
doc.add(facetFields[1]);
docNoGroup.add(facetFields[1]);
} else {
facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
for (int i = 0; i < facetFields.length; i++) {
facetFields[i] = new SortedSetDocValuesField("facet", new BytesRef());
doc.add(facetFields[i]);
docNoGroup.add(facetFields[i]);
}
}
Field content = newStringField("content", "", Field.Store.NO);
doc.add(content);
docNoGroup.add(content);
docNoFacet.add(content);
docNoGroupNoFacet.add(content);
NavigableSet<String> uniqueFacetValues = new TreeSet<>(new Comparator<String>() {
@Override
public int compare(String a, String b) {
if (a == b) {
return 0;
} else if (a == null) {
return -1;
} else if (b == null) {
return 1;
} else {
return a.compareTo(b);
}
}
});
Map<String, Map<String, Set<String>>> searchTermToFacetToGroups = new HashMap<>();
int facetWithMostGroups = 0;
for (int i = 0; i < numDocs; i++) {
final String groupValue;
if (random.nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = "";
} else {
groupValue = groups.get(random.nextInt(groups.size()));
}
String contentStr = contentBrs[random.nextInt(contentBrs.length)];
if (!searchTermToFacetToGroups.containsKey(contentStr)) {
searchTermToFacetToGroups.put(contentStr, new HashMap<String, Set<String>>());
}
Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
List<String> facetVals = new ArrayList<>();
if (multipleFacetValuesPerDocument == false) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetFields[0].setStringValue(facetValue);
facetFields[1].setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
} else {
for (Field facetField : facetFields) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetField.setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
}
}
if (VERBOSE) {
System.out.println(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
}
if (groupValue != null) {
groupDc.setBytesValue(new BytesRef(groupValue));
group.setStringValue(groupValue);
} else {
// TODO: not true
// DV cannot have missing values:
groupDc.setBytesValue(new BytesRef());
}
content.setStringValue(contentStr);
if (groupValue == null && facetVals.isEmpty()) {
writer.addDocument(docNoGroupNoFacet);
} else if (facetVals.isEmpty()) {
writer.addDocument(docNoFacet);
} else if (groupValue == null) {
writer.addDocument(docNoGroup);
} else {
writer.addDocument(doc);
}
}
DirectoryReader reader = writer.getReader();
writer.close();
return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class GroupFacetCollectorTest method testMVGroupedFacetingWithDeletes.
public void testMVGroupedFacetingWithDeletes() throws Exception {
final String groupField = "hotel";
FieldType customType = new FieldType();
customType.setStored(true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
boolean useDv = true;
// Cannot assert this since we use NoMergePolicy:
w.setDoRandomForceMergeAssert(false);
// 0
Document doc = new Document();
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
w.deleteDocuments(new TermQuery(new Term("airport", "ams")));
// 2
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 3
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("dus")));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
// 7
doc = new Document();
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
w.commit();
w.close();
IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir));
GroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField + "_dv", "airport", null, true);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
List<TermGroupFacetCollector.FacetEntry> entries = airportResult.getFacetEntries(0, 10);
assertEquals(2, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
indexSearcher.getIndexReader().close();
dir.close();
}
Aggregations