Search in sources :

Example 1 with IntField

use of org.apache.lucene.document.IntField in project nifi by apache.

the class IndexingAction method index.

public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException {
    final Document doc = new Document();
    addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO);
    addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO);
    addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO);
    addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO);
    addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO);
    addField(doc, SearchableFields.Relationship, record.getRelationship(), Store.NO);
    addField(doc, SearchableFields.Details, record.getDetails(), Store.NO);
    addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection(), Store.NO);
    addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO);
    addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO);
    addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO);
    addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO);
    for (final SearchableField searchableField : searchableAttributeFields) {
        addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO);
    }
    final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");
    // Index the fields that we always index (unless there's nothing else to index at all)
    if (!doc.getFields().isEmpty()) {
        doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
        doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
        doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
        doc.add(new StringField(FieldNames.STORAGE_FILENAME, storageFilename, Store.YES));
        if (blockIndex == null) {
            doc.add(new LongField(FieldNames.STORAGE_FILE_OFFSET, record.getStorageByteOffset(), Store.YES));
        } else {
            doc.add(new IntField(FieldNames.BLOCK_INDEX, blockIndex, Store.YES));
            doc.add(new LongField(SearchableFields.Identifier.getSearchableFieldName(), record.getEventId(), Store.YES));
        }
        // If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
        final ProvenanceEventType eventType = record.getEventType();
        if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
            for (final String uuid : record.getChildUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
                }
            }
        } else if (eventType == ProvenanceEventType.JOIN) {
            for (final String uuid : record.getParentUuids()) {
                if (!uuid.equals(record.getFlowFileUuid())) {
                    addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
                }
            }
        } else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
            // If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
            // that the Source System uses to refer to the data.
            final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
            final String sourceFlowFileUUID;
            final int lastColon = sourceIdentifier.lastIndexOf(":");
            if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
                sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
            } else {
                sourceFlowFileUUID = null;
            }
            if (sourceFlowFileUUID != null) {
                addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID, Store.NO);
            }
        }
        indexWriter.addDocument(doc);
    }
}
Also used : LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) IntField(org.apache.lucene.document.IntField) SearchableField(org.apache.nifi.provenance.search.SearchableField) Document(org.apache.lucene.document.Document) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType)

Example 2 with IntField

use of org.apache.lucene.document.IntField in project cxf by apache.

the class TikaLuceneContentExtractor method getField.

private static Field getField(final LuceneDocumentMetadata documentMetadata, final String name, final String value) {
    final Class<?> type = documentMetadata.getFieldType(name);
    final ParamConverterProvider provider = documentMetadata.getFieldTypeConverter();
    if (type != null) {
        if (Number.class.isAssignableFrom(type)) {
            if (Double.class.isAssignableFrom(type)) {
                return new DoubleField(name, ParamConverterUtils.getValue(Double.class, provider, value), Store.YES);
            } else if (Float.class.isAssignableFrom(type)) {
                return new FloatField(name, ParamConverterUtils.getValue(Float.class, provider, value), Store.YES);
            } else if (Long.class.isAssignableFrom(type)) {
                return new LongField(name, ParamConverterUtils.getValue(Long.class, provider, value), Store.YES);
            } else if (Integer.class.isAssignableFrom(type) || Byte.class.isAssignableFrom(type)) {
                return new IntField(name, ParamConverterUtils.getValue(Integer.class, provider, value), Store.YES);
            }
        } else if (Date.class.isAssignableFrom(type)) {
            final Date date = ParamConverterUtils.getValue(Date.class, provider, value);
            Field field = null;
            if (date != null) {
                field = new StringField(name, ParamConverterUtils.getString(Date.class, provider, date), Store.YES);
            } else {
                field = new StringField(name, value, Store.YES);
            }
            return field;
        }
    }
    return new StringField(name, value, Store.YES);
}
Also used : LongField(org.apache.lucene.document.LongField) LongField(org.apache.lucene.document.LongField) StringField(org.apache.lucene.document.StringField) DoubleField(org.apache.lucene.document.DoubleField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IntField(org.apache.lucene.document.IntField) FloatField(org.apache.lucene.document.FloatField) ParamConverterProvider(javax.ws.rs.ext.ParamConverterProvider) StringField(org.apache.lucene.document.StringField) IntField(org.apache.lucene.document.IntField) DoubleField(org.apache.lucene.document.DoubleField) FloatField(org.apache.lucene.document.FloatField) Date(java.util.Date)

Example 3 with IntField

use of org.apache.lucene.document.IntField in project querydsl by querydsl.

the class LuceneQueryTest method createDocument.

private Document createDocument(final String docTitle, final String docAuthor, final String docText, final int docYear, final double docGross) {
    Document doc = new Document();
    // Reusing field for performance
    if (titleField == null) {
        titleField = new TextField("title", docTitle, Store.YES);
        doc.add(titleField);
        titleSortedField = new SortedDocValuesField("title", new BytesRef(docTitle));
        doc.add(titleSortedField);
    } else {
        titleField.setStringValue(docTitle);
        titleSortedField.setBytesValue(new BytesRef(docTitle));
        doc.add(titleField);
        doc.add(titleSortedField);
    }
    if (authorField == null) {
        authorField = new TextField("author", docAuthor, Store.YES);
        doc.add(authorField);
        authorSortedField = new SortedDocValuesField("author", new BytesRef(docAuthor));
        doc.add(authorSortedField);
    } else {
        authorField.setStringValue(docAuthor);
        authorSortedField.setBytesValue(new BytesRef(docAuthor));
        doc.add(authorField);
        doc.add(authorSortedField);
    }
    if (textField == null) {
        textField = new TextField("text", docText, Store.YES);
        doc.add(textField);
        textSortedField = new SortedDocValuesField("text", new BytesRef(docText));
        doc.add(textSortedField);
    } else {
        textField.setStringValue(docText);
        textSortedField.setBytesValue(new BytesRef(docText));
        doc.add(textField);
        doc.add(textSortedField);
    }
    if (yearField == null) {
        yearField = new IntField("year", docYear, Store.YES);
        doc.add(yearField);
        yearSortedField = new NumericDocValuesField("year", docYear);
        doc.add(yearSortedField);
    } else {
        yearField.setIntValue(docYear);
        yearSortedField.setLongValue(docYear);
        doc.add(yearField);
        doc.add(yearSortedField);
    }
    if (grossField == null) {
        grossField = new DoubleField("gross", docGross, Store.YES);
        doc.add(grossField);
        grossSortedField = new DoubleDocValuesField("gross", docGross);
        doc.add(grossSortedField);
    } else {
        grossField.setDoubleValue(docGross);
        grossSortedField.setDoubleValue(docGross);
        doc.add(grossField);
        doc.add(grossSortedField);
    }
    return doc;
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) IntField(org.apache.lucene.document.IntField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) DoubleField(org.apache.lucene.document.DoubleField)

Example 4 with IntField

use of org.apache.lucene.document.IntField in project querydsl by querydsl.

the class LuceneSerializerTest method createDocument.

private Document createDocument() {
    Document doc = new Document();
    doc.add(new Field("title", new StringReader("Jurassic Park")));
    doc.add(new Field("author", new StringReader("Michael Crichton")));
    doc.add(new Field("text", new StringReader("It's a UNIX system! I know this!")));
    doc.add(new Field("rating", new StringReader("Good")));
    doc.add(new Field("publisher", "", Store.YES, Index.ANALYZED));
    doc.add(new IntField("year", 1990, Store.YES));
    doc.add(new DoubleField("gross", 900.0, Store.YES));
    doc.add(new LongField("longField", 1, Store.YES));
    doc.add(new IntField("shortField", 1, Store.YES));
    doc.add(new IntField("byteField", 1, Store.YES));
    doc.add(new FloatField("floatField", 1, Store.YES));
    return doc;
}
Also used : LongField(org.apache.lucene.document.LongField) FloatField(org.apache.lucene.document.FloatField) DoubleField(org.apache.lucene.document.DoubleField) Field(org.apache.lucene.document.Field) IntField(org.apache.lucene.document.IntField) LongField(org.apache.lucene.document.LongField) StringReader(java.io.StringReader) IntField(org.apache.lucene.document.IntField) Document(org.apache.lucene.document.Document) DoubleField(org.apache.lucene.document.DoubleField) FloatField(org.apache.lucene.document.FloatField)

Example 5 with IntField

use of org.apache.lucene.document.IntField in project Krill by KorAP.

the class Test method addDoc.

public static void addDoc(IndexWriter w, Map<String, String> m) throws IOException {
    Document doc = new Document();
    String[] strInt = { "pubDate" };
    String[] strStr = { "id", "corpus", "pubPlace" };
    String[] strTxt = { "title", "subtitle", "textClass" };
    // Text fields
    for (String s : strTxt) {
        doc.add(new TextField(s, m.get(s), Field.Store.YES));
    }
    ;
    // String fields
    for (String s : strStr) {
        doc.add(new StringField(s, m.get(s), Field.Store.YES));
    }
    ;
    // Integer fields
    for (String s : strInt) {
        doc.add(new IntField(s, Integer.parseInt(m.get(s)), Field.Store.YES));
    }
    ;
    FieldType textFieldWithTermVectors = new FieldType(TextField.TYPE_STORED);
    textFieldWithTermVectors.setStoreTermVectors(true);
    textFieldWithTermVectors.setStoreTermVectorOffsets(true);
    textFieldWithTermVectors.setStoreTermVectorPositions(true);
    textFieldWithTermVectors.setStoreTermVectorPayloads(true);
    Field textFieldAnalyzed = new Field("text", m.get("textStr"), textFieldWithTermVectors);
    MultiTermTokenStream ts = getTermVector(m.get("text"));
    textFieldAnalyzed.setTokenStream(ts);
    doc.add(textFieldAnalyzed);
    // Add document to writer
    w.addDocument(doc);
}
Also used : StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IntField(org.apache.lucene.document.IntField) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) IntField(org.apache.lucene.document.IntField) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) MultiTermTokenStream(de.ids_mannheim.korap.index.MultiTermTokenStream)

Aggregations

IntField (org.apache.lucene.document.IntField)7 Document (org.apache.lucene.document.Document)4 Field (org.apache.lucene.document.Field)4 StringField (org.apache.lucene.document.StringField)4 TextField (org.apache.lucene.document.TextField)4 DoubleField (org.apache.lucene.document.DoubleField)3 LongField (org.apache.lucene.document.LongField)3 FloatField (org.apache.lucene.document.FloatField)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 AttributeSearchInfo (com.agiletec.aps.system.common.entity.model.AttributeSearchInfo)1 AttributeTracer (com.agiletec.aps.system.common.entity.model.AttributeTracer)1 IndexableAttributeInterface (com.agiletec.aps.system.common.searchengine.IndexableAttributeInterface)1 Lang (com.agiletec.aps.system.services.lang.Lang)1 ResourceAttributeInterface (com.agiletec.plugins.jacms.aps.system.services.content.model.extraAttribute.ResourceAttributeInterface)1 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 StringReader (java.io.StringReader)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 ParamConverterProvider (javax.ws.rs.ext.ParamConverterProvider)1 DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)1