use of org.apache.lucene.document.StoredField in project ddf by codice.
the class TestGeoNamesQueryLuceneIndex method createDocumentFromGeoEntry.
private Document createDocumentFromGeoEntry(final GeoEntry geoEntry) {
final Document document = new Document();
document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
document.add(new StringField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
for (IndexableField field : strategy.createIndexableFields(point)) {
document.add(field);
}
return document;
}
use of org.apache.lucene.document.StoredField in project ddf by codice.
the class GeoNamesLuceneIndexer method addDocument.
private void addDocument(final IndexWriter indexWriter, final GeoEntry geoEntry, final SpatialStrategy strategy) throws IOException {
final Document document = new Document();
document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
document.add(new TextField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
// This DocValues field is used for sorting by population.
document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
// Add each entry's spatial information for fast spatial filtering.
final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
for (IndexableField field : strategy.createIndexableFields(point)) {
document.add(field);
}
final float boost = calculateBoost(geoEntry);
document.add(new FloatDocValuesField(GeoNamesLuceneConstants.BOOST_FIELD, boost));
indexWriter.addDocument(document);
}
use of org.apache.lucene.document.StoredField in project Anserini by castorini.
the class IndexW2V method indexEmbeddings.
public void indexEmbeddings() throws IOException, InterruptedException {
LOG.info("Starting indexer...");
long startTime = System.currentTimeMillis();
final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
final IndexWriterConfig config = new IndexWriterConfig(analyzer);
final IndexWriter writer = new IndexWriter(directory, config);
BufferedReader bRdr = new BufferedReader(new FileReader(args.input));
String line = null;
bRdr.readLine();
Document document = new Document();
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
int cnt = 0;
while ((line = bRdr.readLine()) != null) {
String[] termEmbedding = line.trim().split("\t");
document.add(new StringField(LuceneDocumentGenerator.FIELD_ID, termEmbedding[0], Field.Store.NO));
String[] parts = termEmbedding[1].split(" ");
for (int i = 0; i < parts.length; ++i) {
byteStream.write(ByteBuffer.allocate(4).putFloat(Float.parseFloat(parts[i])).array());
}
document.add(new StoredField(FIELD_BODY, byteStream.toByteArray()));
byteStream.flush();
byteStream.reset();
writer.addDocument(document);
document.clear();
cnt++;
if (cnt % 100000 == 0) {
LOG.info(cnt + " terms indexed");
}
}
LOG.info(String.format("Total of %s terms added", cnt));
try {
writer.commit();
writer.forceMerge(1);
} finally {
try {
writer.close();
} catch (IOException e) {
LOG.error(e);
}
}
LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
}
use of org.apache.lucene.document.StoredField in project elasticsearch by elastic.
the class DateFieldMapper method parseCreateField.
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
String dateAsString;
if (context.externalValueSet()) {
Object dateAsObject = context.externalValue();
if (dateAsObject == null) {
dateAsString = null;
} else {
dateAsString = dateAsObject.toString();
}
} else {
dateAsString = context.parser().textOrNull();
}
if (dateAsString == null) {
dateAsString = fieldType().nullValueAsString();
}
if (dateAsString == null) {
return;
}
long timestamp;
try {
timestamp = fieldType().parse(dateAsString);
} catch (IllegalArgumentException e) {
if (ignoreMalformed.value()) {
return;
} else {
throw e;
}
}
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(fieldType().name(), dateAsString, fieldType().boost());
}
if (fieldType().indexOptions() != IndexOptions.NONE) {
fields.add(new LongPoint(fieldType().name(), timestamp));
}
if (fieldType().hasDocValues()) {
fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
}
if (fieldType().stored()) {
fields.add(new StoredField(fieldType().name(), timestamp));
}
}
use of org.apache.lucene.document.StoredField in project elasticsearch by elastic.
the class InternalEngineTests method testHandleDocumentFailure.
public void testHandleDocumentFailure() throws Exception {
try (Store store = createStore()) {
final ParsedDocument doc1 = testParsedDocument("1", "test", null, testDocumentWithTextField(), B_1, null);
final ParsedDocument doc2 = testParsedDocument("2", "test", null, testDocumentWithTextField(), B_1, null);
final ParsedDocument doc3 = testParsedDocument("3", "test", null, testDocumentWithTextField(), B_1, null);
AtomicReference<ThrowingIndexWriter> throwingIndexWriter = new AtomicReference<>();
try (Engine engine = createEngine(defaultSettings, store, createTempDir(), NoMergePolicy.INSTANCE, (directory, iwc) -> {
throwingIndexWriter.set(new ThrowingIndexWriter(directory, iwc));
return throwingIndexWriter.get();
})) {
// test document failure while indexing
if (randomBoolean()) {
throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
} else {
throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
}
Engine.IndexResult indexResult = engine.index(indexForDoc(doc1));
assertNotNull(indexResult.getFailure());
throwingIndexWriter.get().clearFailure();
indexResult = engine.index(indexForDoc(doc1));
assertNull(indexResult.getFailure());
engine.index(indexForDoc(doc2));
// all these simulated exceptions are not fatal to the IW so we treat them as document failures
if (randomBoolean()) {
throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
expectThrows(IOException.class, () -> engine.delete(new Engine.Delete("test", "1", newUid(doc1))));
} else {
throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
expectThrows(IllegalArgumentException.class, () -> engine.delete(new Engine.Delete("test", "1", newUid(doc1))));
}
// test non document level failure is thrown
if (randomBoolean()) {
// simulate close by corruption
throwingIndexWriter.get().setThrowFailure(null);
UncheckedIOException uncheckedIOException = expectThrows(UncheckedIOException.class, () -> {
Engine.Index index = indexForDoc(doc3);
index.parsedDoc().rootDoc().add(new StoredField("foo", "bar") {
// this is a hack to add a failure during store document which triggers a tragic event
// and in turn fails the engine
@Override
public BytesRef binaryValue() {
throw new UncheckedIOException(new MockDirectoryWrapper.FakeIOException());
}
});
engine.index(index);
});
assertTrue(uncheckedIOException.getCause() instanceof MockDirectoryWrapper.FakeIOException);
} else {
// normal close
engine.close();
}
// now the engine is closed check we respond correctly
try {
if (randomBoolean()) {
engine.index(indexForDoc(doc1));
} else {
engine.delete(new Engine.Delete("test", "", newUid(doc1)));
}
fail("engine should be closed");
} catch (Exception e) {
assertThat(e, instanceOf(AlreadyClosedException.class));
}
}
}
}
Aggregations