use of org.apache.lucene.document.LongField in project jackrabbit-oak by apache.
the class LuceneDocumentMaker method addTypedFields.
private boolean addTypedFields(List<Field> fields, PropertyState property, String pname) {
int tag = property.getType().tag();
boolean fieldAdded = false;
for (int i = 0; i < property.count(); i++) {
Field f;
if (tag == Type.LONG.tag()) {
f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO);
} else if (tag == Type.DATE.tag()) {
String date = property.getValue(Type.DATE, i);
f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO);
} else if (tag == Type.DOUBLE.tag()) {
f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO);
} else if (tag == Type.BOOLEAN.tag()) {
f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO);
} else {
f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO);
}
fields.add(f);
fieldAdded = true;
}
return fieldAdded;
}
use of org.apache.lucene.document.LongField in project nifi by apache.
the class TestEventIndexTask method testIndexWriterCommittedWhenAppropriate.
@Test(timeout = 5000)
public void testIndexWriterCommittedWhenAppropriate() throws IOException, InterruptedException {
final BlockingQueue<StoredDocument> docQueue = new LinkedBlockingQueue<>();
final RepositoryConfiguration repoConfig = new RepositoryConfiguration();
final File storageDir = new File("target/storage/TestEventIndexTask/1");
repoConfig.addStorageDirectory("1", storageDir);
final AtomicInteger commitCount = new AtomicInteger(0);
// Mock out an IndexWriter and keep track of the number of events that are indexed.
final IndexWriter indexWriter = Mockito.mock(IndexWriter.class);
final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, storageDir);
final IndexManager indexManager = Mockito.mock(IndexManager.class);
Mockito.when(indexManager.borrowIndexWriter(Mockito.any(File.class))).thenReturn(eventIndexWriter);
final IndexDirectoryManager directoryManager = new IndexDirectoryManager(repoConfig);
// Create an EventIndexTask and override the commit(IndexWriter) method so that we can keep track of how
// many times the index writer gets committed.
final EventIndexTask task = new EventIndexTask(docQueue, repoConfig, indexManager, directoryManager, 201, EventReporter.NO_OP) {
@Override
protected void commit(EventIndexWriter indexWriter) throws IOException {
commitCount.incrementAndGet();
}
};
// Create 4 threads, each one a daemon thread running the EventIndexTask
for (int i = 0; i < 4; i++) {
final Thread t = new Thread(task);
t.setDaemon(true);
t.start();
}
assertEquals(0, commitCount.get());
// Index 100 documents with a storage filename of "0.0.prov"
for (int i = 0; i < 100; i++) {
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
final StoredDocument storedDoc = new StoredDocument(document, location);
docQueue.add(storedDoc);
}
assertEquals(0, commitCount.get());
// Index 100 documents
for (int i = 0; i < 100; i++) {
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
final StoredDocument storedDoc = new StoredDocument(document, location);
docQueue.add(storedDoc);
}
// Wait until we've indexed all 200 events
while (eventIndexWriter.getEventsIndexed() < 200) {
Thread.sleep(10L);
}
// Wait a bit and make sure that we still haven't committed the index writer.
Thread.sleep(100L);
assertEquals(0, commitCount.get());
// Add another document.
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
StoredDocument storedDoc = new StoredDocument(document, location);
docQueue.add(storedDoc);
// Wait until index writer is committed.
while (commitCount.get() == 0) {
Thread.sleep(10L);
}
assertEquals(1, commitCount.get());
// Add a new IndexableDocument with a count of 1 to ensure that the writer is committed again.
storedDoc = new StoredDocument(document, location);
docQueue.add(storedDoc);
Thread.sleep(100L);
assertEquals(1, commitCount.get());
// Add a new IndexableDocument with a count of 3. Index writer should not be committed again.
storedDoc = new StoredDocument(document, location);
docQueue.add(storedDoc);
Thread.sleep(100L);
assertEquals(1, commitCount.get());
}
use of org.apache.lucene.document.LongField in project nifi by apache.
the class ConvertEventToLuceneDocument method convert.
public Document convert(final ProvenanceEventRecord record, final StorageSummary persistedEvent) {
final Document doc = new Document();
addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid());
addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()));
addField(doc, SearchableFields.ComponentID, record.getComponentId());
addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri());
addField(doc, SearchableFields.EventType, record.getEventType().name());
addField(doc, SearchableFields.Relationship, record.getRelationship());
addField(doc, SearchableFields.Details, record.getDetails());
addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection());
addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer());
addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier());
addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier());
addField(doc, SearchableFields.TransitURI, record.getTransitUri());
for (final SearchableField searchableField : searchableAttributeFields) {
addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())));
}
// Index the fields that we always index (unless there's nothing else to index at all)
if (!doc.getFields().isEmpty()) {
// Always include Lineage Start Date because it allows us to make our Lineage queries more efficient.
doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
// Always include Event Time because most queries are bound by a start and end time.
doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
// We always include File Size because the UI wants to always render the controls for specifying this. This idea could be revisited.
doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
// We always store the event Event ID in the Document but do not index it. It doesn't make sense to query based on Event ID because
// if we want a particular Event ID, we can just obtain it directly from the EventStore. But when we obtain a Document, this info must
// be stored so that we know how to lookup the event in the store.
doc.add(new UnIndexedLongField(SearchableFields.Identifier.getSearchableFieldName(), persistedEvent.getEventId()));
// If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
final ProvenanceEventType eventType = record.getEventType();
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
for (final String uuid : record.getChildUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid);
}
}
} else if (eventType == ProvenanceEventType.JOIN) {
for (final String uuid : record.getParentUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid);
}
}
} else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
// If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
// that the Source System uses to refer to the data.
final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
final String sourceFlowFileUUID;
final int lastColon = sourceIdentifier.lastIndexOf(":");
if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
} else {
sourceFlowFileUUID = null;
}
if (sourceFlowFileUUID != null) {
addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID);
}
}
return doc;
}
return null;
}
use of org.apache.lucene.document.LongField in project nifi by apache.
the class IndexingAction method index.
public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException {
final Document doc = new Document();
addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO);
addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO);
addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO);
addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO);
addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO);
addField(doc, SearchableFields.Relationship, record.getRelationship(), Store.NO);
addField(doc, SearchableFields.Details, record.getDetails(), Store.NO);
addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection(), Store.NO);
addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO);
addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO);
addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO);
addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO);
for (final SearchableField searchableField : searchableAttributeFields) {
addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO);
}
final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");
// Index the fields that we always index (unless there's nothing else to index at all)
if (!doc.getFields().isEmpty()) {
doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
doc.add(new StringField(FieldNames.STORAGE_FILENAME, storageFilename, Store.YES));
if (blockIndex == null) {
doc.add(new LongField(FieldNames.STORAGE_FILE_OFFSET, record.getStorageByteOffset(), Store.YES));
} else {
doc.add(new IntField(FieldNames.BLOCK_INDEX, blockIndex, Store.YES));
doc.add(new LongField(SearchableFields.Identifier.getSearchableFieldName(), record.getEventId(), Store.YES));
}
// If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
final ProvenanceEventType eventType = record.getEventType();
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
for (final String uuid : record.getChildUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.JOIN) {
for (final String uuid : record.getParentUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
// If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
// that the Source System uses to refer to the data.
final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
final String sourceFlowFileUUID;
final int lastColon = sourceIdentifier.lastIndexOf(":");
if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
} else {
sourceFlowFileUUID = null;
}
if (sourceFlowFileUUID != null) {
addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID, Store.NO);
}
}
indexWriter.addDocument(doc);
}
}
use of org.apache.lucene.document.LongField in project cxf by apache.
the class TikaLuceneContentExtractor method getField.
private static Field getField(final LuceneDocumentMetadata documentMetadata, final String name, final String value) {
final Class<?> type = documentMetadata.getFieldType(name);
final ParamConverterProvider provider = documentMetadata.getFieldTypeConverter();
if (type != null) {
if (Number.class.isAssignableFrom(type)) {
if (Double.class.isAssignableFrom(type)) {
return new DoubleField(name, ParamConverterUtils.getValue(Double.class, provider, value), Store.YES);
} else if (Float.class.isAssignableFrom(type)) {
return new FloatField(name, ParamConverterUtils.getValue(Float.class, provider, value), Store.YES);
} else if (Long.class.isAssignableFrom(type)) {
return new LongField(name, ParamConverterUtils.getValue(Long.class, provider, value), Store.YES);
} else if (Integer.class.isAssignableFrom(type) || Byte.class.isAssignableFrom(type)) {
return new IntField(name, ParamConverterUtils.getValue(Integer.class, provider, value), Store.YES);
}
} else if (Date.class.isAssignableFrom(type)) {
final Date date = ParamConverterUtils.getValue(Date.class, provider, value);
Field field = null;
if (date != null) {
field = new StringField(name, ParamConverterUtils.getString(Date.class, provider, date), Store.YES);
} else {
field = new StringField(name, value, Store.YES);
}
return field;
}
}
return new StringField(name, value, Store.YES);
}
Aggregations