use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class XMoreLikeThis method retrieveTerms.
/**
* Find words for a more-like-this query former.
*
* @param docNum the id of the lucene document from which to find terms
*/
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
Map<String, Int> termFreqMap = new HashMap<>();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
} else {
vector = null;
}
// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
IndexableField[] fields = d.getFields(fieldName);
for (IndexableField field : fields) {
final String stringValue = field.stringValue();
if (stringValue != null) {
addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
}
}
} else {
addTermFrequencies(termFreqMap, vector, fieldName);
}
}
return createQueue(termFreqMap);
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class AbstractTermVectorsTestCase method indexDocsWithLucene.
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
Map<String, Analyzer> mapping = new HashMap<>();
for (TestFieldSetting field : testDocs[0].fieldSettings) {
if (field.storedPayloads) {
mapping.put(field.name, new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenFilter filter = new LowerCaseFilter(tokenizer);
filter = new TypeAsPayloadTokenFilter(filter);
return new TokenStreamComponents(tokenizer, filter);
}
});
}
}
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping);
Directory dir = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(wrapper);
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
for (TestDoc doc : testDocs) {
Document d = new Document();
d.add(new Field("id", doc.id, StringField.TYPE_STORED));
for (int i = 0; i < doc.fieldContent.length; i++) {
FieldType type = new FieldType(TextField.TYPE_STORED);
TestFieldSetting fieldSetting = doc.fieldSettings[i];
type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
type.setStoreTermVectors(true);
type.freeze();
d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
}
writer.updateDocument(new Term("id", doc.id), d);
writer.commit();
}
writer.close();
return DirectoryReader.open(dir);
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class TermVectorsUnitTests method writeEmptyTermVector.
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPayloads(false);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document d = new Document();
d.add(new Field("id", "abc", StringField.TYPE_STORED));
writer.updateDocument(new Term("id", "abc"), d);
writer.commit();
writer.close();
DirectoryReader dr = DirectoryReader.open(dir);
IndexSearcher s = new IndexSearcher(dr);
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
ScoreDoc[] scoreDocs = search.scoreDocs;
int doc = scoreDocs[0].doc;
Fields fields = dr.getTermVectors(doc);
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
outResponse.setFields(fields, null, flags, fields);
outResponse.setExists(true);
dr.close();
dir.close();
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class TermVectorsUnitTests method writeStandardTermVector.
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPayloads(false);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document d = new Document();
d.add(new Field("id", "abc", StringField.TYPE_STORED));
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
writer.updateDocument(new Term("id", "abc"), d);
writer.commit();
writer.close();
DirectoryReader dr = DirectoryReader.open(dir);
IndexSearcher s = new IndexSearcher(dr);
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
ScoreDoc[] scoreDocs = search.scoreDocs;
int doc = scoreDocs[0].doc;
Fields termVectors = dr.getTermVectors(doc);
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
outResponse.setFields(termVectors, null, flags, termVectors);
dr.close();
dir.close();
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class RecoverySourceHandlerTests method testHandleExceptinoOnSendSendFiles.
public void testHandleExceptinoOnSendSendFiles() throws Throwable {
Settings settings = Settings.builder().put("indices.recovery.concurrent_streams", 1).put("indices.recovery.concurrent_small_file_streams", 1).build();
final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
final StartRecoveryRequest request = new StartRecoveryRequest(shardId, new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), null, randomBoolean(), randomNonNegativeLong(), randomBoolean() ? SequenceNumbersService.UNASSIGNED_SEQ_NO : 0L);
Path tempDir = createTempDir();
Store store = newStore(tempDir, false);
AtomicBoolean failedEngine = new AtomicBoolean(false);
RecoverySourceHandler handler = new RecoverySourceHandler(null, null, request, () -> 0L, e -> () -> {
}, recoverySettings.getChunkSize().bytesAsInt(), Settings.EMPTY) {
@Override
protected void failEngine(IOException cause) {
assertFalse(failedEngine.get());
failedEngine.set(true);
}
};
Directory dir = store.directory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
writer.addDocument(document);
}
writer.commit();
writer.close();
Store.MetadataSnapshot metadata = store.getMetadata(null);
List<StoreFileMetaData> metas = new ArrayList<>();
for (StoreFileMetaData md : metadata) {
metas.add(md);
}
final boolean throwCorruptedIndexException = randomBoolean();
Store targetStore = newStore(createTempDir(), false);
try {
handler.sendFiles(store, metas.toArray(new StoreFileMetaData[0]), (md) -> {
if (throwCorruptedIndexException) {
throw new RuntimeException(new CorruptIndexException("foo", "bar"));
} else {
throw new RuntimeException("boom");
}
});
fail("exception index");
} catch (RuntimeException ex) {
assertNull(ExceptionsHelper.unwrapCorruption(ex));
if (throwCorruptedIndexException) {
assertEquals(ex.getMessage(), "[File corruption occurred on recovery but checksums are ok]");
} else {
assertEquals(ex.getMessage(), "boom");
}
} catch (CorruptIndexException ex) {
fail("not expected here");
}
assertFalse(failedEngine.get());
IOUtils.close(store, targetStore);
}
Aggregations