use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestSuggestField method testReturnedDocID.
@Test
public void testReturnedDocID() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, num));
document.add(new StoredField("int_field", i));
iw.addDocument(document);
if (random().nextBoolean()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertEquals(num, suggest.totalHits);
for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
String key = suggestScoreDoc.key.toString();
assertTrue(key.startsWith("abc_"));
String substring = key.substring(4);
int fieldValue = Integer.parseInt(substring);
Document doc = reader.document(suggestScoreDoc.doc);
assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue);
}
reader.close();
iw.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestSuggestField method testMultipleSegments.
@Test
public void testMultipleSegments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
List<Entry> entries = new ArrayList<>();
// ensure at least some segments have no suggest field
for (int i = num; i > 0; i--) {
Document document = new Document();
if (random().nextInt(4) == 1) {
document.add(new SuggestField("suggest_field", "abc_" + i, i));
entries.add(new Entry("abc_" + i, i));
}
document.add(new StoredField("weight_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
reader.close();
iw.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testDeletePartiallyWrittenFilesIfAbort.
public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
iwConf.setCodec(CompressingCodec.randomInstance(random()));
// disable CFS because this test checks file names
iwConf.setMergePolicy(newLogMergePolicy(false));
iwConf.setUseCompoundFile(false);
// Cannot use RIW because this test wants CFS to stay off:
IndexWriter iw = new IndexWriter(dir, iwConf);
final Document validDoc = new Document();
validDoc.add(new IntPoint("id", 0));
validDoc.add(new StoredField("id", 0));
iw.addDocument(validDoc);
iw.commit();
// make sure that #writeField will fail to trigger an abort
final Document invalidDoc = new Document();
FieldType fieldType = new FieldType();
fieldType.setStored(true);
invalidDoc.add(new Field("invalid", fieldType) {
@Override
public String stringValue() {
// abort the segment!! We should fix this.
return null;
}
});
try {
iw.addDocument(invalidDoc);
iw.commit();
} catch (IllegalArgumentException iae) {
// expected
assertEquals(iae, iw.getTragicException());
}
// Writer should be closed by tragedy
assertFalse(iw.isOpen());
dir.close();
}
use of org.apache.lucene.document.StoredField in project Anserini by castorini.
the class ObjectTriplesLuceneDocumentGenerator method createDocument.
public Document createDocument(ObjectTriples src) {
// Convert the triple doc to lucene doc
Document doc = new Document();
// Index subject as a StringField to allow searching
Field subjectField = new StringField(FIELD_SUBJECT, cleanUri(src.getSubject()), Field.Store.YES);
doc.add(subjectField);
// Iterate over predicates and object values
for (Map.Entry<String, List<String>> entry : src.getPredicateValues().entrySet()) {
String predicate = cleanUri(entry.getKey());
List<String> values = entry.getValue();
for (String value : values) {
String valueType = getObjectType(value);
value = normalizeObjectValue(value);
if (isIndexedPredicate(predicate)) {
if (valueType.equals(VALUE_TYPE_URI)) {
// Always index URIs using StringField
doc.add(new StringField(predicate, value, Field.Store.YES));
} else {
// Just store the predicate in a stored field, no index
doc.add(new TextField(predicate, value, Field.Store.YES));
}
} else {
// Just add the predicate as a stored field, no index on it
doc.add(new StoredField(predicate, value));
}
}
}
src.clear();
return doc;
}
use of org.apache.lucene.document.StoredField in project Anserini by castorini.
the class LuceneDocumentGenerator method createDocument.
public Document createDocument(SourceDocument src) {
String id = src.id();
String contents;
try {
// If there's a transform, use it.
contents = transform != null ? transform.apply(src.content()) : src.content();
} catch (Exception e) {
LOG.error("Error extracting document text, skipping document: " + id, e);
counters.errors.incrementAndGet();
return null;
}
if (contents.trim().length() == 0) {
LOG.info("Empty document: " + id);
counters.emptyDocuments.incrementAndGet();
return null;
}
// make a new, empty document
Document document = new Document();
// document id
document.add(new StringField(FIELD_ID, id, Field.Store.YES));
if (args.storeRawDocs) {
document.add(new StoredField(FIELD_RAW, src.content()));
}
FieldType fieldType = new FieldType();
fieldType.setStored(args.storeTransformedDocs);
// Are we storing document vectors?
if (args.storeDocvectors) {
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorPositions(true);
}
// Are we building a "positional" or "count" index?
if (args.storePositions) {
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
} else {
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
document.add(new Field(FIELD_BODY, contents, fieldType));
return document;
}
Aggregations