use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.
the class DocumentValueSourceDictionaryTest method testValueSourceBasic.
@Test
public void testValueSourceBasic() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(atLeast(100));
for (Document doc : docs.values()) {
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f;
while ((f = inputIterator.next()) != null) {
Document doc = docs.remove(f.utf8ToString());
long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
if (payloadField == null)
assertTrue(inputIterator.payload().length == 0);
else
assertEquals(inputIterator.payload(), payloadField.binaryValue());
}
assertTrue(docs.isEmpty());
IOUtils.close(ir, analyzer, dir);
}
use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.
the class DocumentDictionaryTest method testWithContexts.
@Test
public void testWithContexts() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true);
Map<String, Document> docs = res.getValue();
List<String> invalidDocTerms = res.getKey();
for (Document doc : docs.values()) {
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f;
while ((f = inputIterator.next()) != null) {
Document doc = docs.remove(f.utf8ToString());
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
if (payloadField == null)
assertTrue(inputIterator.payload().length == 0);
else
assertEquals(inputIterator.payload(), payloadField.binaryValue());
Set<BytesRef> oriCtxs = new HashSet<>();
Set<BytesRef> contextSet = inputIterator.contexts();
for (IndexableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) {
oriCtxs.add(ctxf.binaryValue());
}
assertEquals(oriCtxs.size(), contextSet.size());
}
for (String invalidTerm : invalidDocTerms) {
assertNotNull(docs.remove(invalidTerm));
}
assertTrue(docs.isEmpty());
IOUtils.close(ir, analyzer, dir);
}
use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.
the class DocumentValueSourceDictionaryTest method testLongValuesSourceWithDeletions.
@Test
public void testLongValuesSourceWithDeletions() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(atLeast(100));
Random rand = random();
List<String> termsToDel = new ArrayList<>();
for (Document doc : docs.values()) {
if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) {
termsToDel.add(doc.get(FIELD_NAME));
}
writer.addDocument(doc);
}
writer.commit();
Term[] delTerms = new Term[termsToDel.size()];
for (int i = 0; i < termsToDel.size(); i++) {
delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
}
for (Term delTerm : delTerms) {
writer.deleteDocuments(delTerm);
}
writer.commit();
writer.close();
for (String termToDel : termsToDel) {
assertTrue(null != docs.remove(termToDel));
}
IndexReader ir = DirectoryReader.open(dir);
assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
assertEquals(ir.numDocs(), docs.size());
LongValuesSource sumValues = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValues, PAYLOAD_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f;
while ((f = inputIterator.next()) != null) {
Document doc = docs.remove(f.utf8ToString());
long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), w2 + w1);
IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
if (payloadField == null)
assertTrue(inputIterator.payload().length == 0);
else
assertEquals(inputIterator.payload(), payloadField.binaryValue());
}
assertTrue(docs.isEmpty());
IOUtils.close(ir, analyzer, dir);
}
use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.
the class DocumentValueSourceDictionaryTest method testLongValuesSourceBasic.
@Test
public void testLongValuesSourceBasic() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(atLeast(100));
for (Document doc : docs.values()) {
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
LongValuesSource sumValueSource = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValueSource, PAYLOAD_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f;
while ((f = inputIterator.next()) != null) {
Document doc = docs.remove(f.utf8ToString());
long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
assertEquals(inputIterator.weight(), (w1 + w2 + w3));
IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
if (payloadField == null)
assertTrue(inputIterator.payload().length == 0);
else
assertEquals(inputIterator.payload(), payloadField.binaryValue());
}
assertTrue(docs.isEmpty());
IOUtils.close(ir, analyzer, dir);
}
use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.
the class DocumentDictionaryTest method testWithoutPayload.
@Test
public void testWithoutPayload() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
Map<String, Document> docs = res.getValue();
List<String> invalidDocTerms = res.getKey();
for (Document doc : docs.values()) {
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
BytesRef f;
while ((f = inputIterator.next()) != null) {
Document doc = docs.remove(f.utf8ToString());
assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
assertNull(inputIterator.payload());
}
for (String invalidTerm : invalidDocTerms) {
assertNotNull(docs.remove(invalidTerm));
}
assertTrue(docs.isEmpty());
IOUtils.close(ir, analyzer, dir);
}
Aggregations