use of org.apache.lucene.document.IntField in project nifi by apache.
the class IndexingAction method index.
public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException {
final Document doc = new Document();
addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO);
addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO);
addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO);
addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO);
addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO);
addField(doc, SearchableFields.Relationship, record.getRelationship(), Store.NO);
addField(doc, SearchableFields.Details, record.getDetails(), Store.NO);
addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection(), Store.NO);
addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO);
addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO);
addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO);
addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO);
for (final SearchableField searchableField : searchableAttributeFields) {
addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO);
}
final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");
// Index the fields that we always index (unless there's nothing else to index at all)
if (!doc.getFields().isEmpty()) {
doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
doc.add(new StringField(FieldNames.STORAGE_FILENAME, storageFilename, Store.YES));
if (blockIndex == null) {
doc.add(new LongField(FieldNames.STORAGE_FILE_OFFSET, record.getStorageByteOffset(), Store.YES));
} else {
doc.add(new IntField(FieldNames.BLOCK_INDEX, blockIndex, Store.YES));
doc.add(new LongField(SearchableFields.Identifier.getSearchableFieldName(), record.getEventId(), Store.YES));
}
// If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
final ProvenanceEventType eventType = record.getEventType();
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
for (final String uuid : record.getChildUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.JOIN) {
for (final String uuid : record.getParentUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
// If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
// that the Source System uses to refer to the data.
final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
final String sourceFlowFileUUID;
final int lastColon = sourceIdentifier.lastIndexOf(":");
if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
} else {
sourceFlowFileUUID = null;
}
if (sourceFlowFileUUID != null) {
addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID, Store.NO);
}
}
indexWriter.addDocument(doc);
}
}
use of org.apache.lucene.document.IntField in project cxf by apache.
the class TikaLuceneContentExtractor method getField.
private static Field getField(final LuceneDocumentMetadata documentMetadata, final String name, final String value) {
final Class<?> type = documentMetadata.getFieldType(name);
final ParamConverterProvider provider = documentMetadata.getFieldTypeConverter();
if (type != null) {
if (Number.class.isAssignableFrom(type)) {
if (Double.class.isAssignableFrom(type)) {
return new DoubleField(name, ParamConverterUtils.getValue(Double.class, provider, value), Store.YES);
} else if (Float.class.isAssignableFrom(type)) {
return new FloatField(name, ParamConverterUtils.getValue(Float.class, provider, value), Store.YES);
} else if (Long.class.isAssignableFrom(type)) {
return new LongField(name, ParamConverterUtils.getValue(Long.class, provider, value), Store.YES);
} else if (Integer.class.isAssignableFrom(type) || Byte.class.isAssignableFrom(type)) {
return new IntField(name, ParamConverterUtils.getValue(Integer.class, provider, value), Store.YES);
}
} else if (Date.class.isAssignableFrom(type)) {
final Date date = ParamConverterUtils.getValue(Date.class, provider, value);
Field field = null;
if (date != null) {
field = new StringField(name, ParamConverterUtils.getString(Date.class, provider, date), Store.YES);
} else {
field = new StringField(name, value, Store.YES);
}
return field;
}
}
return new StringField(name, value, Store.YES);
}
use of org.apache.lucene.document.IntField in project querydsl by querydsl.
the class LuceneQueryTest method createDocument.
private Document createDocument(final String docTitle, final String docAuthor, final String docText, final int docYear, final double docGross) {
Document doc = new Document();
// Reusing field for performance
if (titleField == null) {
titleField = new TextField("title", docTitle, Store.YES);
doc.add(titleField);
titleSortedField = new SortedDocValuesField("title", new BytesRef(docTitle));
doc.add(titleSortedField);
} else {
titleField.setStringValue(docTitle);
titleSortedField.setBytesValue(new BytesRef(docTitle));
doc.add(titleField);
doc.add(titleSortedField);
}
if (authorField == null) {
authorField = new TextField("author", docAuthor, Store.YES);
doc.add(authorField);
authorSortedField = new SortedDocValuesField("author", new BytesRef(docAuthor));
doc.add(authorSortedField);
} else {
authorField.setStringValue(docAuthor);
authorSortedField.setBytesValue(new BytesRef(docAuthor));
doc.add(authorField);
doc.add(authorSortedField);
}
if (textField == null) {
textField = new TextField("text", docText, Store.YES);
doc.add(textField);
textSortedField = new SortedDocValuesField("text", new BytesRef(docText));
doc.add(textSortedField);
} else {
textField.setStringValue(docText);
textSortedField.setBytesValue(new BytesRef(docText));
doc.add(textField);
doc.add(textSortedField);
}
if (yearField == null) {
yearField = new IntField("year", docYear, Store.YES);
doc.add(yearField);
yearSortedField = new NumericDocValuesField("year", docYear);
doc.add(yearSortedField);
} else {
yearField.setIntValue(docYear);
yearSortedField.setLongValue(docYear);
doc.add(yearField);
doc.add(yearSortedField);
}
if (grossField == null) {
grossField = new DoubleField("gross", docGross, Store.YES);
doc.add(grossField);
grossSortedField = new DoubleDocValuesField("gross", docGross);
doc.add(grossSortedField);
} else {
grossField.setDoubleValue(docGross);
grossSortedField.setDoubleValue(docGross);
doc.add(grossField);
doc.add(grossSortedField);
}
return doc;
}
use of org.apache.lucene.document.IntField in project querydsl by querydsl.
the class LuceneSerializerTest method createDocument.
private Document createDocument() {
Document doc = new Document();
doc.add(new Field("title", new StringReader("Jurassic Park")));
doc.add(new Field("author", new StringReader("Michael Crichton")));
doc.add(new Field("text", new StringReader("It's a UNIX system! I know this!")));
doc.add(new Field("rating", new StringReader("Good")));
doc.add(new Field("publisher", "", Store.YES, Index.ANALYZED));
doc.add(new IntField("year", 1990, Store.YES));
doc.add(new DoubleField("gross", 900.0, Store.YES));
doc.add(new LongField("longField", 1, Store.YES));
doc.add(new IntField("shortField", 1, Store.YES));
doc.add(new IntField("byteField", 1, Store.YES));
doc.add(new FloatField("floatField", 1, Store.YES));
return doc;
}
use of org.apache.lucene.document.IntField in project Krill by KorAP.
the class Test method addDoc.
public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
Document doc = new Document();
String[] strInt = { "pubDate" };
String[] strStr = { "id", "corpus", "pubPlace" };
String[] strTxt = { "title", "subtitle", "textClass" };
// Text fields
for (String s : strTxt) {
doc.add(new TextField(s, m.get(s), Field.Store.YES));
}
;
// String fields
for (String s : strStr) {
doc.add(new StringField(s, m.get(s), Field.Store.YES));
}
;
// Integer fields
for (String s : strInt) {
doc.add(new IntField(s, Integer.parseInt(m.get(s)), Field.Store.YES));
}
;
FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
textFieldWithTermVectors.setStoreTermVectors(true);
textFieldWithTermVectors.setStoreTermVectorOffsets(true);
textFieldWithTermVectors.setStoreTermVectorPositions(true);
textFieldWithTermVectors.setStoreTermVectorPayloads(true);
Field textFieldAnalyzed = new Field("text", m.get("textStr"), textFieldWithTermVectors);
MultiTermTokenStream ts = getTermVector(m.get("text"));
textFieldAnalyzed.setTokenStream(ts);
doc.add(textFieldAnalyzed);
// Add document to writer
w.addDocument(doc);
}
Aggregations