Search in sources :

Example 56 with StoredField

use of org.apache.lucene.document.StoredField in project ddf by codice.

the class TestGeoNamesQueryLuceneIndex method createDocumentFromGeoEntry.

private Document createDocumentFromGeoEntry(final GeoEntry geoEntry) {
    final Document document = new Document();
    document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
    document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
    document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
    document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
    document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
    document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
    document.add(new StringField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
    document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
    final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
    for (IndexableField field : strategy.createIndexableFields(point)) {
        document.add(field);
    }
    return document;
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) StoredField(org.apache.lucene.document.StoredField) Shape(org.locationtech.spatial4j.shape.Shape) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document)

Example 57 with StoredField

use of org.apache.lucene.document.StoredField in project ddf by codice.

the class GeoNamesLuceneIndexer method addDocument.

private void addDocument(final IndexWriter indexWriter, final GeoEntry geoEntry, final SpatialStrategy strategy) throws IOException {
    final Document document = new Document();
    document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
    document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
    document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
    document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
    document.add(new TextField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
    document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
    // This DocValues field is used for sorting by population.
    document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
    document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
    // Add each entry's spatial information for fast spatial filtering.
    final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
    for (IndexableField field : strategy.createIndexableFields(point)) {
        document.add(field);
    }
    final float boost = calculateBoost(geoEntry);
    document.add(new FloatDocValuesField(GeoNamesLuceneConstants.BOOST_FIELD, boost));
    indexWriter.addDocument(document);
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) StoredField(org.apache.lucene.document.StoredField) Shape(org.locationtech.spatial4j.shape.Shape) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document)

Example 58 with StoredField

use of org.apache.lucene.document.StoredField in project Anserini by castorini.

the class IndexW2V method indexEmbeddings.

public void indexEmbeddings() throws IOException, InterruptedException {
    LOG.info("Starting indexer...");
    long startTime = System.currentTimeMillis();
    final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(directory, config);
    BufferedReader bRdr = new BufferedReader(new FileReader(args.input));
    String line = null;
    bRdr.readLine();
    Document document = new Document();
    ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
    int cnt = 0;
    while ((line = bRdr.readLine()) != null) {
        String[] termEmbedding = line.trim().split("\t");
        document.add(new StringField(LuceneDocumentGenerator.FIELD_ID, termEmbedding[0], Field.Store.NO));
        String[] parts = termEmbedding[1].split(" ");
        for (int i = 0; i < parts.length; ++i) {
            byteStream.write(ByteBuffer.allocate(4).putFloat(Float.parseFloat(parts[i])).array());
        }
        document.add(new StoredField(FIELD_BODY, byteStream.toByteArray()));
        byteStream.flush();
        byteStream.reset();
        writer.addDocument(document);
        document.clear();
        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " terms indexed");
        }
    }
    LOG.info(String.format("Total of %s terms added", cnt));
    try {
        writer.commit();
        writer.forceMerge(1);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            LOG.error(e);
        }
    }
    LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 59 with StoredField

use of org.apache.lucene.document.StoredField in project elasticsearch by elastic.

the class DateFieldMapper method parseCreateField.

@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
    String dateAsString;
    if (context.externalValueSet()) {
        Object dateAsObject = context.externalValue();
        if (dateAsObject == null) {
            dateAsString = null;
        } else {
            dateAsString = dateAsObject.toString();
        }
    } else {
        dateAsString = context.parser().textOrNull();
    }
    if (dateAsString == null) {
        dateAsString = fieldType().nullValueAsString();
    }
    if (dateAsString == null) {
        return;
    }
    long timestamp;
    try {
        timestamp = fieldType().parse(dateAsString);
    } catch (IllegalArgumentException e) {
        if (ignoreMalformed.value()) {
            return;
        } else {
            throw e;
        }
    }
    if (context.includeInAll(includeInAll, this)) {
        context.allEntries().addText(fieldType().name(), dateAsString, fieldType().boost());
    }
    if (fieldType().indexOptions() != IndexOptions.NONE) {
        fields.add(new LongPoint(fieldType().name(), timestamp));
    }
    if (fieldType().hasDocValues()) {
        fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
    }
    if (fieldType().stored()) {
        fields.add(new StoredField(fieldType().name(), timestamp));
    }
}
Also used : SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) LongPoint(org.apache.lucene.document.LongPoint)

Example 60 with StoredField

use of org.apache.lucene.document.StoredField in project elasticsearch by elastic.

the class InternalEngineTests method testHandleDocumentFailure.

public void testHandleDocumentFailure() throws Exception {
    try (Store store = createStore()) {
        final ParsedDocument doc1 = testParsedDocument("1", "test", null, testDocumentWithTextField(), B_1, null);
        final ParsedDocument doc2 = testParsedDocument("2", "test", null, testDocumentWithTextField(), B_1, null);
        final ParsedDocument doc3 = testParsedDocument("3", "test", null, testDocumentWithTextField(), B_1, null);
        AtomicReference<ThrowingIndexWriter> throwingIndexWriter = new AtomicReference<>();
        try (Engine engine = createEngine(defaultSettings, store, createTempDir(), NoMergePolicy.INSTANCE, (directory, iwc) -> {
            throwingIndexWriter.set(new ThrowingIndexWriter(directory, iwc));
            return throwingIndexWriter.get();
        })) {
            // test document failure while indexing
            if (randomBoolean()) {
                throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
            } else {
                throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
            }
            Engine.IndexResult indexResult = engine.index(indexForDoc(doc1));
            assertNotNull(indexResult.getFailure());
            throwingIndexWriter.get().clearFailure();
            indexResult = engine.index(indexForDoc(doc1));
            assertNull(indexResult.getFailure());
            engine.index(indexForDoc(doc2));
            // all these simulated exceptions are not fatal to the IW so we treat them as document failures
            if (randomBoolean()) {
                throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
                expectThrows(IOException.class, () -> engine.delete(new Engine.Delete("test", "1", newUid(doc1))));
            } else {
                throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
                expectThrows(IllegalArgumentException.class, () -> engine.delete(new Engine.Delete("test", "1", newUid(doc1))));
            }
            // test non document level failure is thrown
            if (randomBoolean()) {
                // simulate close by corruption
                throwingIndexWriter.get().setThrowFailure(null);
                UncheckedIOException uncheckedIOException = expectThrows(UncheckedIOException.class, () -> {
                    Engine.Index index = indexForDoc(doc3);
                    index.parsedDoc().rootDoc().add(new StoredField("foo", "bar") {

                        // this is a hack to add a failure during store document which triggers a tragic event
                        // and in turn fails the engine
                        @Override
                        public BytesRef binaryValue() {
                            throw new UncheckedIOException(new MockDirectoryWrapper.FakeIOException());
                        }
                    });
                    engine.index(index);
                });
                assertTrue(uncheckedIOException.getCause() instanceof MockDirectoryWrapper.FakeIOException);
            } else {
                // normal close
                engine.close();
            }
            // now the engine is closed check we respond correctly
            try {
                if (randomBoolean()) {
                    engine.index(indexForDoc(doc1));
                } else {
                    engine.delete(new Engine.Delete("test", "", newUid(doc1)));
                }
                fail("engine should be closed");
            } catch (Exception e) {
                assertThat(e, instanceOf(AlreadyClosedException.class));
            }
        }
    }
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) Store(org.elasticsearch.index.store.Store) AtomicReference(java.util.concurrent.atomic.AtomicReference) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ElasticsearchException(org.elasticsearch.ElasticsearchException) StoredField(org.apache.lucene.document.StoredField) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

StoredField (org.apache.lucene.document.StoredField)110 Document (org.apache.lucene.document.Document)98 Directory (org.apache.lucene.store.Directory)72 StringField (org.apache.lucene.document.StringField)44 Field (org.apache.lucene.document.Field)40 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)39 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)36 BytesRef (org.apache.lucene.util.BytesRef)35 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)34 TextField (org.apache.lucene.document.TextField)31 IndexReader (org.apache.lucene.index.IndexReader)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 IntPoint (org.apache.lucene.document.IntPoint)24 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)23 TopDocs (org.apache.lucene.search.TopDocs)23 SortField (org.apache.lucene.search.SortField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)21 Sort (org.apache.lucene.search.Sort)21 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)18 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18