Search in sources :

Example 6 with Record

use of org.apache.pulsar.functions.api.Record in project pulsar-io-cloud-storage by streamnative.

the class BlobStoreAbstractSink method unsafeFlush.

private void unsafeFlush() {
    final List<Record<GenericRecord>> recordsToInsert = Lists.newArrayList();
    while (!pendingFlushQueue.isEmpty() && recordsToInsert.size() < maxBatchSize) {
        Record<GenericRecord> r = pendingFlushQueue.poll();
        if (r != null) {
            recordsToInsert.add(r);
        }
    }
    log.info("Flushing {} buffered records to blob store", recordsToInsert);
    Record<GenericRecord> firstRecord = recordsToInsert.get(0);
    Schema<GenericRecord> schema = getPulsarSchema(firstRecord);
    format.initSchema(schema);
    final Iterator<Record<GenericRecord>> iter = recordsToInsert.iterator();
    String filepath = buildPartitionPath(firstRecord, partitioner, format);
    try {
        ByteBuffer payload = bindValue(iter, format);
        log.info("Uploading blob {} currentBatchSize {}", filepath, currentBatchSize.get());
        long elapsedMs = System.currentTimeMillis();
        uploadPayload(payload, filepath);
        elapsedMs = System.currentTimeMillis() - elapsedMs;
        log.debug("Uploading blob elapsed time in ms: {}", elapsedMs);
        recordsToInsert.forEach(Record::ack);
        currentBatchSize.addAndGet(-1 * recordsToInsert.size());
        if (sinkContext != null) {
            sinkContext.recordMetric(METRICS_TOTAL_SUCCESS, recordsToInsert.size());
            sinkContext.recordMetric(METRICS_LATEST_UPLOAD_ELAPSED_TIME, elapsedMs);
        }
        log.info("Successfully uploaded blob {} currentBatchSize {}", filepath, currentBatchSize.get());
    } catch (Exception e) {
        if (e instanceof ContainerNotFoundException) {
            log.error("Blob {} is not found", filepath, e);
        } else if (e instanceof IOException) {
            log.error("Failed to write to blob {}", filepath, e);
        } else {
            log.error("Encountered unknown error writing to blob {}", filepath, e);
        }
        recordsToInsert.forEach(Record::fail);
        if (sinkContext != null) {
            sinkContext.recordMetric(METRICS_TOTAL_FAILURE, recordsToInsert.size());
        }
    }
}
Also used : Record(org.apache.pulsar.functions.api.Record) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) IOException(java.io.IOException) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) ByteBuffer(java.nio.ByteBuffer) ContainerNotFoundException(org.jclouds.blobstore.ContainerNotFoundException) ContainerNotFoundException(org.jclouds.blobstore.ContainerNotFoundException) IOException(java.io.IOException)

Example 7 with Record

use of org.apache.pulsar.functions.api.Record in project pulsar-io-cloud-storage by streamnative.

the class AvroFormatTest method getFormatGeneratedRecord.

public org.apache.avro.generic.GenericRecord getFormatGeneratedRecord(TopicName topicName, Message<GenericRecord> msg) throws Exception {
    Record<GenericRecord> mockRecord = mock(Record.class);
    Schema<GenericRecord> mockSchema = mock(Schema.class);
    when(mockRecord.getTopicName()).thenReturn(Optional.of(topicName.toString()));
    when(mockRecord.getPartitionIndex()).thenReturn(Optional.of(0));
    when(mockRecord.getMessage()).thenReturn(Optional.of(msg));
    when(mockRecord.getValue()).thenReturn(msg.getValue());
    when(mockRecord.getPartitionId()).thenReturn(Optional.of(String.format("%s-%s", topicName, 0)));
    when(mockRecord.getRecordSequence()).thenReturn(Optional.of(3221225506L));
    when(mockRecord.getSchema()).thenReturn(mockSchema);
    List<Record<GenericRecord>> records = new ArrayList<>();
    records.add(mockRecord);
    final Schema<GenericRecord> schema = (Schema<GenericRecord>) msg.getReaderSchema().get();
    org.apache.avro.Schema avroSchema = AvroRecordUtil.convertToAvroSchema(schema);
    avroSchema = MetadataUtil.setMetadataSchema(avroSchema);
    final GenericDatumReader<Object> datumReader = new GenericDatumReader<>(avroSchema);
    ByteSource byteSource = getFormat().recordWriter(records.listIterator());
    final SeekableByteArrayInput input = new SeekableByteArrayInput(byteSource.read());
    final DataFileReader<Object> objects = new DataFileReader<>(input, datumReader);
    return (org.apache.avro.generic.GenericRecord) objects.next();
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.pulsar.client.api.Schema) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) ByteSource(org.apache.pulsar.jcloud.shade.com.google.common.io.ByteSource) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) Record(org.apache.pulsar.functions.api.Record) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 8 with Record

use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.

the class InfluxDBSink method buildPoint.

@Override
protected final Point buildPoint(Record<GenericRecord> record) {
    val genericRecord = record.getValue();
    // looking for measurement
    val measurementField = genericRecord.getField("measurement");
    if (null == measurementField) {
        throw new SchemaSerializationException("measurement is a required field.");
    }
    val measurement = (String) measurementField;
    // looking for timestamp
    long timestamp;
    val timestampField = getFiled(genericRecord, "timestamp");
    if (null == timestampField) {
        timestamp = System.currentTimeMillis();
    } else if (timestampField instanceof Number) {
        timestamp = ((Number) timestampField).longValue();
    } else if (timestampField instanceof String) {
        timestamp = Long.parseLong((String) timestampField);
    } else {
        throw new SchemaSerializationException("Invalid timestamp field");
    }
    val point = Point.measurement(measurement).time(timestamp, writePrecision);
    // Looking for tag fields
    val tagsField = getFiled(genericRecord, "tags");
    if (null != tagsField) {
        if (tagsField instanceof GenericRecord) {
            // JSONSchema
            GenericRecord tagsRecord = (GenericRecord) tagsField;
            for (Field field : tagsRecord.getFields()) {
                val fieldName = field.getName();
                val value = tagsRecord.getField(field);
                point.addTag(fieldName, (String) value);
            }
        } else if (Map.class.isAssignableFrom(tagsField.getClass())) {
            // AvroSchema
            Map<Object, Object> tagsMap = (Map<Object, Object>) tagsField;
            tagsMap.forEach((key, value) -> point.addTag(key.toString(), value.toString()));
        } else {
            throw new SchemaSerializationException("Unknown type for 'tags'");
        }
    }
    // Looking for sensor fields
    val columnsField = genericRecord.getField("fields");
    if (columnsField instanceof GenericRecord) {
        // JSONSchema
        val columnsRecord = (GenericRecord) columnsField;
        for (Field field : columnsRecord.getFields()) {
            val fieldName = field.getName();
            val value = columnsRecord.getField(field);
            addPointField(point, fieldName, value);
        }
    } else if (Map.class.isAssignableFrom(columnsField.getClass())) {
        // AvroSchema
        val columnsMap = (Map<Object, Object>) columnsField;
        columnsMap.forEach((key, value) -> addPointField(point, key.toString(), value));
    } else {
        throw new SchemaSerializationException("Unknown type for 'fields'");
    }
    return point;
}
Also used : lombok.val(lombok.val) Utf8(org.apache.avro.util.Utf8) InfluxDBClient(com.influxdb.client.InfluxDBClient) lombok.val(lombok.val) SchemaSerializationException(org.apache.pulsar.client.api.SchemaSerializationException) WriteApiBlocking(com.influxdb.client.WriteApiBlocking) Field(org.apache.pulsar.client.api.schema.Field) WritePrecision(com.influxdb.client.domain.WritePrecision) BatchSink(org.apache.pulsar.io.influxdb.BatchSink) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) SinkContext(org.apache.pulsar.io.core.SinkContext) Map(java.util.Map) Point(com.influxdb.client.write.Point) Record(org.apache.pulsar.functions.api.Record) Field(org.apache.pulsar.client.api.schema.Field) SchemaSerializationException(org.apache.pulsar.client.api.SchemaSerializationException) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) Map(java.util.Map)

Example 9 with Record

use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.

the class BatchSink method flush.

private void flush() {
    List<Record<R>> toFlushList;
    synchronized (this) {
        if (incomingList.isEmpty()) {
            return;
        }
        toFlushList = incomingList;
        incomingList = Lists.newArrayList();
    }
    val points = Lists.<T>newArrayListWithExpectedSize(toFlushList.size());
    if (CollectionUtils.isNotEmpty(toFlushList)) {
        for (Record<R> record : toFlushList) {
            try {
                points.add(buildPoint(record));
            } catch (Exception e) {
                record.fail();
                toFlushList.remove(record);
                log.warn("Record flush thread was exception ", e);
            }
        }
    }
    try {
        if (CollectionUtils.isNotEmpty(points)) {
            writePoints(points);
        }
        toFlushList.forEach(Record::ack);
        points.clear();
        toFlushList.clear();
    } catch (Exception e) {
        toFlushList.forEach(Record::fail);
        log.error("InfluxDB write batch data exception ", e);
    }
}
Also used : lombok.val(lombok.val) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) Record(org.apache.pulsar.functions.api.Record)

Example 10 with Record

use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.

the class InfluxDBGenericRecordSink method buildPoint.

@Override
protected Point buildPoint(Record<GenericRecord> message) throws Exception {
    Map<String, String> tags;
    Map<String, Object> fields = Maps.newHashMap();
    GenericRecord record = message.getValue();
    Object measurementField = getFiled(record, "measurement");
    if (null == measurementField) {
        throw new SchemaSerializationException("measurement is a required field.");
    }
    String measurement = measurementField.toString();
    // Looking for tags
    Object tagsField = getFiled(record, "tags");
    if (null == tagsField) {
        tags = ImmutableMap.of();
    } else if (Map.class.isAssignableFrom(tagsField.getClass())) {
        tags = ((Map<Object, Object>) tagsField).entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey().toString(), entry -> entry.getValue().toString()));
    } else {
        // Field 'tags' that is not of Map type will be ignored
        tags = ImmutableMap.of();
    }
    // Just insert the current time millis
    long timestamp = System.currentTimeMillis();
    for (Field field : record.getFields()) {
        String fieldName = field.getName();
        if (FIELDS_TO_SKIP.contains(fieldName)) {
            continue;
        }
        Object fieldValue = record.getField(field);
        if (null != fieldValue) {
            fields.put(fieldName, fieldValue);
        }
    }
    Point.Builder builder = Point.measurement(measurement).time(timestamp, TimeUnit.MILLISECONDS).tag(tags).fields(fields);
    return builder.build();
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) SchemaSerializationException(org.apache.pulsar.client.api.SchemaSerializationException) Set(java.util.Set) Field(org.apache.pulsar.client.api.schema.Field) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) Slf4j(lombok.extern.slf4j.Slf4j) Map(java.util.Map) Point(org.influxdb.dto.Point) Record(org.apache.pulsar.functions.api.Record) Field(org.apache.pulsar.client.api.schema.Field) SchemaSerializationException(org.apache.pulsar.client.api.SchemaSerializationException) Point(org.influxdb.dto.Point) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map)

Aggregations

Record (org.apache.pulsar.functions.api.Record)64 GenericRecord (org.apache.pulsar.client.api.schema.GenericRecord)43 Test (org.testng.annotations.Test)35 Slf4j (lombok.extern.slf4j.Slf4j)24 GenericObject (org.apache.pulsar.client.api.schema.GenericObject)21 RecordSchemaBuilder (org.apache.pulsar.client.api.schema.RecordSchemaBuilder)20 Schema (org.apache.pulsar.client.api.Schema)18 CompletableFuture (java.util.concurrent.CompletableFuture)17 Mockito.mock (org.mockito.Mockito.mock)16 Assert (org.testng.Assert)15 Assert.assertEquals (org.testng.Assert.assertEquals)15 Assert.assertNull (org.testng.Assert.assertNull)15 HashMap (java.util.HashMap)14 Optional (java.util.Optional)13 GenericSchema (org.apache.pulsar.client.api.schema.GenericSchema)12 SchemaType (org.apache.pulsar.common.schema.SchemaType)12 Executors (java.util.concurrent.Executors)11 Function (org.apache.pulsar.functions.api.Function)11 ExecutorService (java.util.concurrent.ExecutorService)10 SinkContext (org.apache.pulsar.io.core.SinkContext)10