Search in sources :

Example 26 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class FlowFileTable method getRowType.

@Override
public RelDataType getRowType(final RelDataTypeFactory typeFactory) {
    if (relDataType != null) {
        return relDataType;
    }
    RecordSchema schema;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReader recordParser = recordParserFactory.createRecordReader(flowFile, in, logger);
        schema = recordParser.getSchema();
    } catch (final Exception e) {
        throw new ProcessException("Failed to determine schema of data records for " + flowFile, e);
    }
    final List<String> names = new ArrayList<>();
    final List<RelDataType> types = new ArrayList<>();
    final JavaTypeFactory javaTypeFactory = (JavaTypeFactory) typeFactory;
    for (final RecordField field : schema.getFields()) {
        names.add(field.getFieldName());
        types.add(getRelDataType(field.getDataType(), javaTypeFactory));
    }
    logger.debug("Found Schema: {}", new Object[] { schema });
    if (recordSchema == null) {
        recordSchema = schema;
    }
    relDataType = typeFactory.createStructType(Pair.zip(names, types));
    return relDataType;
}
Also used : ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordField(org.apache.nifi.serialization.record.RecordField) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ArrayList(java.util.ArrayList) JavaTypeFactory(org.apache.calcite.adapter.java.JavaTypeFactory) RelDataType(org.apache.calcite.rel.type.RelDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) ProcessException(org.apache.nifi.processor.exception.ProcessException)

Example 27 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class TestConsumeAzureEventHub method setupRecordReader.

private void setupRecordReader(List<EventData> eventDataList, int throwExceptionAt, String writeFailureWith) throws MalformedRecordException, IOException, SchemaNotFoundException {
    final RecordReaderFactory readerFactory = mock(RecordReaderFactory.class);
    processor.setReaderFactory(readerFactory);
    final RecordReader reader = mock(RecordReader.class);
    when(readerFactory.createRecordReader(anyMap(), any(), any())).thenReturn(reader);
    final List<Record> recordList = eventDataList.stream().map(eventData -> toRecord(new String(eventData.getBytes()))).collect(Collectors.toList());
    // Add null to indicate the end of records.
    final Function<List<Record>, List<Record>> addEndRecord = rs -> rs.stream().flatMap(r -> r.getAsString("value").equals(writeFailureWith) ? Stream.of(r) : Stream.of(r, null)).collect(Collectors.toList());
    final List<Record> recordSetList = addEndRecord.apply(recordList);
    final Record[] records = recordSetList.toArray(new Record[recordSetList.size()]);
    switch(throwExceptionAt) {
        case -1:
            when(reader.nextRecord()).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
            break;
        case 0:
            when(reader.nextRecord()).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
            break;
        default:
            final List<Record> recordList1 = addEndRecord.apply(recordList.subList(0, throwExceptionAt));
            final List<Record> recordList2 = addEndRecord.apply(recordList.subList(throwExceptionAt + 1, recordList.size()));
            final Record[] records1 = recordList1.toArray(new Record[recordList1.size()]);
            final Record[] records2 = recordList2.toArray(new Record[recordList2.size()]);
            when(reader.nextRecord()).thenReturn(records1[0], Arrays.copyOfRange(records1, 1, records1.length)).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records2[0], Arrays.copyOfRange(records2, 1, records2.length));
    }
}
Also used : Arrays(java.util.Arrays) HashMap(java.util.HashMap) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Mockito.doThrow(org.mockito.Mockito.doThrow) RecordReader(org.apache.nifi.serialization.RecordReader) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) Before(org.junit.Before) OutputStream(java.io.OutputStream) MockProcessSession(org.apache.nifi.util.MockProcessSession) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) RecordField(org.apache.nifi.serialization.record.RecordField) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) EventData(com.microsoft.azure.eventhubs.EventData) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) PartitionContext(com.microsoft.azure.eventprocessorhost.PartitionContext) Matchers.any(org.mockito.Matchers.any) Mockito(org.mockito.Mockito) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) Stream(java.util.stream.Stream) Matchers.anyMap(org.mockito.Matchers.anyMap) SharedSessionState(org.apache.nifi.util.SharedSessionState) MockComponentLog(org.apache.nifi.util.MockComponentLog) ProcessorInitializationContext(org.apache.nifi.processor.ProcessorInitializationContext) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) MockFlowFile(org.apache.nifi.util.MockFlowFile) Mockito.mock(org.mockito.Mockito.mock) RecordReader(org.apache.nifi.serialization.RecordReader) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) List(java.util.List) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException)

Example 28 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PutHBaseRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    List<PutFlowFile> flowFiles = new ArrayList<>();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
    final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
    final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
    final long start = System.nanoTime();
    int index = 0;
    int columns = 0;
    boolean failed = false;
    String startIndexStr = flowFile.getAttribute("restart.index");
    int startIndex = -1;
    if (startIndexStr != null) {
        startIndex = Integer.parseInt(startIndexStr);
    }
    PutFlowFile last = null;
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
        Record record;
        if (startIndex >= 0) {
            while (index++ < startIndex && (reader.nextRecord()) != null) {
            }
        }
        while ((record = reader.nextRecord()) != null) {
            PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), flowFile, rowFieldName, columnFamily, timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
            if (putFlowFile.getColumns().size() == 0) {
                continue;
            }
            flowFiles.add(putFlowFile);
            index++;
            if (flowFiles.size() == batchSize) {
                columns += addBatch(tableName, flowFiles);
                last = flowFiles.get(flowFiles.size() - 1);
                flowFiles = new ArrayList<>();
            }
        }
        if (flowFiles.size() > 0) {
            columns += addBatch(tableName, flowFiles);
            last = flowFiles.get(flowFiles.size() - 1);
        }
    } catch (Exception ex) {
        getLogger().error("Failed to put records to HBase.", ex);
        failed = true;
    }
    if (!failed) {
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.removeAttribute(flowFile, "restart.index");
        session.transfer(flowFile, REL_SUCCESS);
    } else {
        String restartIndex = Integer.toString(index - flowFiles.size());
        flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    }
    session.commit();
}
Also used : PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ArrayList(java.util.ArrayList) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) IllegalTypeConversionException(org.apache.nifi.serialization.record.util.IllegalTypeConversionException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) Record(org.apache.nifi.serialization.record.Record)

Aggregations

RecordReader (org.apache.nifi.serialization.RecordReader)28 InputStream (java.io.InputStream)22 FlowFile (org.apache.nifi.flowfile.FlowFile)22 IOException (java.io.IOException)21 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)21 ProcessException (org.apache.nifi.processor.exception.ProcessException)18 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)17 HashMap (java.util.HashMap)16 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)16 Record (org.apache.nifi.serialization.record.Record)15 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)13 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)12 OutputStream (java.io.OutputStream)10 Map (java.util.Map)10 WriteResult (org.apache.nifi.serialization.WriteResult)8 RecordSet (org.apache.nifi.serialization.record.RecordSet)8 ArrayList (java.util.ArrayList)7 BufferedInputStream (java.io.BufferedInputStream)5 ComponentLog (org.apache.nifi.logging.ComponentLog)5