Search in sources :

Example 1 with LongField

use of org.apache.lucene.document.LongField in project jackrabbit-oak by apache.

the class LuceneDocumentMaker method addTypedFields.

private boolean addTypedFields(List<Field> fields, PropertyState property, String pname) {
    int tag = property.getType().tag();
    boolean fieldAdded = false;
    for (int i = 0; i < property.count(); i++) {
        Field f;
        if (tag == Type.LONG.tag()) {
            f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO);
        } else if (tag == Type.DATE.tag()) {
            String date = property.getValue(Type.DATE, i);
            f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO);
        } else if (tag == Type.DOUBLE.tag()) {
            f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO);
        } else if (tag == Type.BOOLEAN.tag()) {
            f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO);
        } else {
            f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO);
        }
        fields.add(f);
        fieldAdded = true;
    }
    return fieldAdded;
}
Also used : FieldFactory.newFulltextField(org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField) LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) FieldFactory.newDepthField(org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newDepthField) FieldFactory.newPropertyField(org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FieldFactory.newPathField(org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleField(org.apache.lucene.document.DoubleField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) FieldFactory.newAncestorsField(org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newAncestorsField) Field(org.apache.lucene.document.Field) LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) DoubleField(org.apache.lucene.document.DoubleField)

Example 2 with LongField

use of org.apache.lucene.document.LongField in project nifi by apache.

the class TestEventIndexTask method testIndexWriterCommittedWhenAppropriate.

@Test(timeout = 5000)
public void testIndexWriterCommittedWhenAppropriate() throws IOException, InterruptedException {
    final BlockingQueue<StoredDocument> docQueue = new LinkedBlockingQueue<>();
    final RepositoryConfiguration repoConfig = new RepositoryConfiguration();
    final File storageDir = new File("target/storage/TestEventIndexTask/1");
    repoConfig.addStorageDirectory("1", storageDir);
    final AtomicInteger commitCount = new AtomicInteger(0);
    // Mock out an IndexWriter and keep track of the number of events that are indexed.
    final IndexWriter indexWriter = Mockito.mock(IndexWriter.class);
    final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, storageDir);
    final IndexManager indexManager = Mockito.mock(IndexManager.class);
    Mockito.when(indexManager.borrowIndexWriter(Mockito.any(File.class))).thenReturn(eventIndexWriter);
    final IndexDirectoryManager directoryManager = new IndexDirectoryManager(repoConfig);
    // Create an EventIndexTask and override the commit(IndexWriter) method so that we can keep track of how
    // many times the index writer gets committed.
    final EventIndexTask task = new EventIndexTask(docQueue, repoConfig, indexManager, directoryManager, 201, EventReporter.NO_OP) {

        @Override
        protected void commit(EventIndexWriter indexWriter) throws IOException {
            commitCount.incrementAndGet();
        }
    };
    // Create 4 threads, each one a daemon thread running the EventIndexTask
    for (int i = 0; i < 4; i++) {
        final Thread t = new Thread(task);
        t.setDaemon(true);
        t.start();
    }
    assertEquals(0, commitCount.get());
    // Index 100 documents with a storage filename of "0.0.prov"
    for (int i = 0; i < 100; i++) {
        final Document document = new Document();
        document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
        final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
        final StoredDocument storedDoc = new StoredDocument(document, location);
        docQueue.add(storedDoc);
    }
    assertEquals(0, commitCount.get());
    // Index 100 documents
    for (int i = 0; i < 100; i++) {
        final Document document = new Document();
        document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
        final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
        final StoredDocument storedDoc = new StoredDocument(document, location);
        docQueue.add(storedDoc);
    }
    // Wait until we've indexed all 200 events
    while (eventIndexWriter.getEventsIndexed() < 200) {
        Thread.sleep(10L);
    }
    // Wait a bit and make sure that we still haven't committed the index writer.
    Thread.sleep(100L);
    assertEquals(0, commitCount.get());
    // Add another document.
    final Document document = new Document();
    document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
    final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
    StoredDocument storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    // Wait until index writer is committed.
    while (commitCount.get() == 0) {
        Thread.sleep(10L);
    }
    assertEquals(1, commitCount.get());
    // Add a new IndexableDocument with a count of 1 to ensure that the writer is committed again.
    storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    Thread.sleep(100L);
    assertEquals(1, commitCount.get());
    // Add a new IndexableDocument with a count of 3. Index writer should not be committed again.
    storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    Thread.sleep(100L);
    assertEquals(1, commitCount.get());
}
Also used : LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Document(org.apache.lucene.document.Document) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) LongField(org.apache.lucene.document.LongField) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) IndexWriter(org.apache.lucene.index.IndexWriter) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) File(java.io.File) Test(org.junit.Test)

Example 3 with LongField

use of org.apache.lucene.document.LongField in project nifi by apache.

the class ConvertEventToLuceneDocument method convert.

public Document convert(final ProvenanceEventRecord record, final StorageSummary persistedEvent) {
    final Document doc = new Document();
    addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid());
    addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()));
    addField(doc, SearchableFields.ComponentID, record.getComponentId());
    addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri());
    addField(doc, SearchableFields.EventType, record.getEventType().name());
    addField(doc, SearchableFields.Relationship, record.getRelationship());
    addField(doc, SearchableFields.Details, record.getDetails());
    addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection());
    addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer());
    addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier());
    addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier());
    addField(doc, SearchableFields.TransitURI, record.getTransitUri());
    for (final SearchableField searchableField : searchableAttributeFields) {
        addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())));
    }
    // Index the fields that we always index (unless there's nothing else to index at all)
    if (!doc.getFields().isEmpty()) {
        // Always include Lineage Start Date because it allows us to make our Lineage queries more efficient.
        doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
        // Always include Event Time because most queries are bound by a start and end time.
        doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
        // We always include File Size because the UI wants to always render the controls for specifying this. This idea could be revisited.
        doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
        // We always store the event Event ID in the Document but do not index it. It doesn't make sense to query based on Event ID because
        // if we want a particular Event ID, we can just obtain it directly from the EventStore. But when we obtain a Document, this info must
        // be stored so that we know how to lookup the event in the store.
        doc.add(new UnIndexedLongField(SearchableFields.Identifier.getSearchableFieldName(), persistedEvent.getEventId()));
        // If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
        final ProvenanceEventType eventType = record.getEventType();
        if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
            for (final String uuid : record.getChildUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid);
                }
            }
        } else if (eventType == ProvenanceEventType.JOIN) {
            for (final String uuid : record.getParentUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid);
                }
            }
        } else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
            // If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
            // that the Source System uses to refer to the data.
            final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
            final String sourceFlowFileUUID;
            final int lastColon = sourceIdentifier.lastIndexOf(":");
            if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
                sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
            } else {
                sourceFlowFileUUID = null;
            }
            if (sourceFlowFileUUID != null) {
                addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID);
            }
        }
        return doc;
    }
    return null;
}
Also used : LongField(org.apache.lucene.document.LongField) SearchableField(org.apache.nifi.provenance.search.SearchableField) Document(org.apache.lucene.document.Document) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType)

Example 4 with LongField

use of org.apache.lucene.document.LongField in project nifi by apache.

the class IndexingAction method index.

public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException {
    final Document doc = new Document();
    addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO);
    addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO);
    addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO);
    addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO);
    addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO);
    addField(doc, SearchableFields.Relationship, record.getRelationship(), Store.NO);
    addField(doc, SearchableFields.Details, record.getDetails(), Store.NO);
    addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection(), Store.NO);
    addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO);
    addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO);
    addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO);
    addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO);
    for (final SearchableField searchableField : searchableAttributeFields) {
        addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO);
    }
    final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");
    // Index the fields that we always index (unless there's nothing else to index at all)
    if (!doc.getFields().isEmpty()) {
        doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
        doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
        doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
        doc.add(new StringField(FieldNames.STORAGE_FILENAME, storageFilename, Store.YES));
        if (blockIndex == null) {
            doc.add(new LongField(FieldNames.STORAGE_FILE_OFFSET, record.getStorageByteOffset(), Store.YES));
        } else {
            doc.add(new IntField(FieldNames.BLOCK_INDEX, blockIndex, Store.YES));
            doc.add(new LongField(SearchableFields.Identifier.getSearchableFieldName(), record.getEventId(), Store.YES));
        }
        // If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
        final ProvenanceEventType eventType = record.getEventType();
        if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
            for (final String uuid : record.getChildUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
                }
            }
        } else if (eventType == ProvenanceEventType.JOIN) {
            for (final String uuid : record.getParentUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
                }
            }
        } else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
            // If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
            // that the Source System uses to refer to the data.
            final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
            final String sourceFlowFileUUID;
            final int lastColon = sourceIdentifier.lastIndexOf(":");
            if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
                sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
            } else {
                sourceFlowFileUUID = null;
            }
            if (sourceFlowFileUUID != null) {
                addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID, Store.NO);
            }
        }
        indexWriter.addDocument(doc);
    }
}
Also used : LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) IntField(org.apache.lucene.document.IntField) SearchableField(org.apache.nifi.provenance.search.SearchableField) Document(org.apache.lucene.document.Document) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType)

Example 5 with LongField

use of org.apache.lucene.document.LongField in project cxf by apache.

the class TikaLuceneContentExtractor method getField.

private static Field getField(final LuceneDocumentMetadata documentMetadata, final String name, final String value) {
    final Class<?> type = documentMetadata.getFieldType(name);
    final ParamConverterProvider provider = documentMetadata.getFieldTypeConverter();
    if (type != null) {
        if (Number.class.isAssignableFrom(type)) {
            if (Double.class.isAssignableFrom(type)) {
                return new DoubleField(name, ParamConverterUtils.getValue(Double.class, provider, value), Store.YES);
            } else if (Float.class.isAssignableFrom(type)) {
                return new FloatField(name, ParamConverterUtils.getValue(Float.class, provider, value), Store.YES);
            } else if (Long.class.isAssignableFrom(type)) {
                return new LongField(name, ParamConverterUtils.getValue(Long.class, provider, value), Store.YES);
            } else if (Integer.class.isAssignableFrom(type) || Byte.class.isAssignableFrom(type)) {
                return new IntField(name, ParamConverterUtils.getValue(Integer.class, provider, value), Store.YES);
            }
        } else if (Date.class.isAssignableFrom(type)) {
            final Date date = ParamConverterUtils.getValue(Date.class, provider, value);
            Field field = null;
            if (date != null) {
                field = new StringField(name, ParamConverterUtils.getString(Date.class, provider, date), Store.YES);
            } else {
                field = new StringField(name, value, Store.YES);
            }
            return field;
        }
    }
    return new StringField(name, value, Store.YES);
}
Also used : LongField(org.apache.lucene.document.LongField) LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) DoubleField(org.apache.lucene.document.DoubleField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IntField(org.apache.lucene.document.IntField) FloatField(org.apache.lucene.document.FloatField) ParamConverterProvider(javax.ws.rs.ext.ParamConverterProvider) StringField(org.apache.lucene.document.StringField) IntField(org.apache.lucene.document.IntField) DoubleField(org.apache.lucene.document.DoubleField) FloatField(org.apache.lucene.document.FloatField) Date(java.util.Date)

Aggregations

LongField (org.apache.lucene.document.LongField)10 Document (org.apache.lucene.document.Document)5 Field (org.apache.lucene.document.Field)5 StringField (org.apache.lucene.document.StringField)5 DoubleField (org.apache.lucene.document.DoubleField)4 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)4 IntField (org.apache.lucene.document.IntField)3 Date (java.util.Date)2 FieldFactory.newAncestorsField (org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newAncestorsField)2 FieldFactory.newDepthField (org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newDepthField)2 FieldFactory.newFulltextField (org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField)2 FieldFactory.newPathField (org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField)2 FieldFactory.newPropertyField (org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField)2 DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)2 FloatField (org.apache.lucene.document.FloatField)2 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)2 TextField (org.apache.lucene.document.TextField)2 SortedSetDocValuesFacetField (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField)2 ProvenanceEventType (org.apache.nifi.provenance.ProvenanceEventType)2 SearchableField (org.apache.nifi.provenance.search.SearchableField)2