use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class ICUCollationField method createFields.
@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
if (field.hasDocValues()) {
List<IndexableField> fields = new ArrayList<>();
fields.add(createField(field, value));
final BytesRef bytes = getCollationKey(field.getName(), value.toString());
if (field.multiValued()) {
fields.add(new SortedSetDocValuesField(field.getName(), bytes));
} else {
fields.add(new SortedDocValuesField(field.getName(), bytes));
}
return fields;
} else {
return Collections.singletonList(createField(field, value));
}
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class BoolFieldSource method createFields.
@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
IndexableField fval = createField(field, value);
if (field.hasDocValues()) {
IndexableField docval;
final BytesRef bytes = new BytesRef(toInternal(value.toString()));
if (field.multiValued()) {
docval = new SortedSetDocValuesField(field.getName(), bytes);
} else {
docval = new SortedDocValuesField(field.getName(), bytes);
}
// Only create a list of we have 2 values...
if (fval != null) {
List<IndexableField> fields = new ArrayList<>(2);
fields.add(fval);
fields.add(docval);
return fields;
}
fval = docval;
}
return Collections.singletonList(fval);
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class CollationField method createFields.
@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
if (field.hasDocValues()) {
List<IndexableField> fields = new ArrayList<>();
fields.add(createField(field, value));
final BytesRef bytes = getCollationKey(field.getName(), value.toString());
if (field.multiValued()) {
fields.add(new SortedSetDocValuesField(field.getName(), bytes));
} else {
fields.add(new SortedDocValuesField(field.getName(), bytes));
}
return fields;
} else {
return Collections.singletonList(createField(field, value));
}
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class DistinctValuesCollectorTest method createIndexContext.
private IndexContext createIndexContext() throws Exception {
Random random = random();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
String[] groupValues = new String[numDocs / 5];
String[] countValues = new String[numDocs / 10];
for (int i = 0; i < groupValues.length; i++) {
groupValues[i] = generateRandomNonEmptyString();
}
for (int i = 0; i < countValues.length; i++) {
countValues[i] = generateRandomNonEmptyString();
}
List<String> contentStrings = new ArrayList<>();
Map<String, Map<String, Set<String>>> searchTermToGroupCounts = new HashMap<>();
for (int i = 1; i <= numDocs; i++) {
String groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.length)];
String countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.length)];
String content = "random" + random.nextInt(numDocs / 20);
Map<String, Set<String>> groupToCounts = searchTermToGroupCounts.get(content);
if (groupToCounts == null) {
// Groups sort always DOCID asc...
searchTermToGroupCounts.put(content, groupToCounts = new LinkedHashMap<>());
contentStrings.add(content);
}
Set<String> countsVals = groupToCounts.get(groupValue);
if (countsVals == null) {
groupToCounts.put(groupValue, countsVals = new HashSet<>());
}
countsVals.add(countValue);
Document doc = new Document();
doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(String.format(Locale.ROOT, "%09d", i))));
if (groupValue != null) {
addField(doc, GROUP_FIELD, groupValue);
}
if (countValue != null) {
addField(doc, COUNT_FIELD, countValue);
}
doc.add(new TextField("content", content, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader reader = w.getReader();
if (VERBOSE) {
for (int docID = 0; docID < reader.maxDoc(); docID++) {
Document doc = reader.document(docID);
System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
}
}
w.close();
return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class AllGroupHeadsCollectorTest method testBasic.
public void testBasic() throws Exception {
final String groupField = "author";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
DocValuesType valueType = DocValuesType.SORTED;
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "random text", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 1));
doc.add(new SortedDocValuesField("id_2", new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 2));
doc.add(new SortedDocValuesField("id_2", new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 3));
doc.add(new SortedDocValuesField("id_2", new BytesRef("3")));
w.addDocument(doc);
// To ensure a second segment
w.commit();
// 3
doc = new Document();
addGroupField(doc, groupField, "author2", valueType);
doc.add(newTextField("content", "some random text", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 4));
doc.add(new SortedDocValuesField("id_2", new BytesRef("4")));
w.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3", valueType);
doc.add(newTextField("content", "some more random text", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 5));
doc.add(new SortedDocValuesField("id_2", new BytesRef("5")));
w.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3", valueType);
doc.add(newTextField("content", "random blob", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 6));
doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
w.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 6));
doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
w.addDocument(doc);
// 7 -- no author field
doc = new Document();
doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 7));
doc.add(new SortedDocValuesField("id_2", new BytesRef("7")));
w.addDocument(doc);
IndexReader reader = w.getReader();
IndexSearcher indexSearcher = newSearcher(reader);
w.close();
int maxDoc = reader.maxDoc();
Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type.INT, true));
AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
assertTrue(arrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
assertTrue(arrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
// STRING sort type triggers different implementation
Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type.STRING, true));
allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup2);
indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type.STRING, false));
allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup3);
indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
// 7 b/c higher doc id wins, even if order of field is in not in reverse.
assertTrue(arrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
indexSearcher.getIndexReader().close();
dir.close();
}
Aggregations