use of org.apache.pulsar.functions.api.Record in project pulsar-io-cloud-storage by streamnative.
the class BlobStoreAbstractSink method unsafeFlush.
private void unsafeFlush() {
final List<Record<GenericRecord>> recordsToInsert = Lists.newArrayList();
while (!pendingFlushQueue.isEmpty() && recordsToInsert.size() < maxBatchSize) {
Record<GenericRecord> r = pendingFlushQueue.poll();
if (r != null) {
recordsToInsert.add(r);
}
}
log.info("Flushing {} buffered records to blob store", recordsToInsert);
Record<GenericRecord> firstRecord = recordsToInsert.get(0);
Schema<GenericRecord> schema = getPulsarSchema(firstRecord);
format.initSchema(schema);
final Iterator<Record<GenericRecord>> iter = recordsToInsert.iterator();
String filepath = buildPartitionPath(firstRecord, partitioner, format);
try {
ByteBuffer payload = bindValue(iter, format);
log.info("Uploading blob {} currentBatchSize {}", filepath, currentBatchSize.get());
long elapsedMs = System.currentTimeMillis();
uploadPayload(payload, filepath);
elapsedMs = System.currentTimeMillis() - elapsedMs;
log.debug("Uploading blob elapsed time in ms: {}", elapsedMs);
recordsToInsert.forEach(Record::ack);
currentBatchSize.addAndGet(-1 * recordsToInsert.size());
if (sinkContext != null) {
sinkContext.recordMetric(METRICS_TOTAL_SUCCESS, recordsToInsert.size());
sinkContext.recordMetric(METRICS_LATEST_UPLOAD_ELAPSED_TIME, elapsedMs);
}
log.info("Successfully uploaded blob {} currentBatchSize {}", filepath, currentBatchSize.get());
} catch (Exception e) {
if (e instanceof ContainerNotFoundException) {
log.error("Blob {} is not found", filepath, e);
} else if (e instanceof IOException) {
log.error("Failed to write to blob {}", filepath, e);
} else {
log.error("Encountered unknown error writing to blob {}", filepath, e);
}
recordsToInsert.forEach(Record::fail);
if (sinkContext != null) {
sinkContext.recordMetric(METRICS_TOTAL_FAILURE, recordsToInsert.size());
}
}
}
use of org.apache.pulsar.functions.api.Record in project pulsar-io-cloud-storage by streamnative.
the class AvroFormatTest method getFormatGeneratedRecord.
public org.apache.avro.generic.GenericRecord getFormatGeneratedRecord(TopicName topicName, Message<GenericRecord> msg) throws Exception {
Record<GenericRecord> mockRecord = mock(Record.class);
Schema<GenericRecord> mockSchema = mock(Schema.class);
when(mockRecord.getTopicName()).thenReturn(Optional.of(topicName.toString()));
when(mockRecord.getPartitionIndex()).thenReturn(Optional.of(0));
when(mockRecord.getMessage()).thenReturn(Optional.of(msg));
when(mockRecord.getValue()).thenReturn(msg.getValue());
when(mockRecord.getPartitionId()).thenReturn(Optional.of(String.format("%s-%s", topicName, 0)));
when(mockRecord.getRecordSequence()).thenReturn(Optional.of(3221225506L));
when(mockRecord.getSchema()).thenReturn(mockSchema);
List<Record<GenericRecord>> records = new ArrayList<>();
records.add(mockRecord);
final Schema<GenericRecord> schema = (Schema<GenericRecord>) msg.getReaderSchema().get();
org.apache.avro.Schema avroSchema = AvroRecordUtil.convertToAvroSchema(schema);
avroSchema = MetadataUtil.setMetadataSchema(avroSchema);
final GenericDatumReader<Object> datumReader = new GenericDatumReader<>(avroSchema);
ByteSource byteSource = getFormat().recordWriter(records.listIterator());
final SeekableByteArrayInput input = new SeekableByteArrayInput(byteSource.read());
final DataFileReader<Object> objects = new DataFileReader<>(input, datumReader);
return (org.apache.avro.generic.GenericRecord) objects.next();
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class InfluxDBSink method buildPoint.
@Override
protected final Point buildPoint(Record<GenericRecord> record) {
val genericRecord = record.getValue();
// looking for measurement
val measurementField = genericRecord.getField("measurement");
if (null == measurementField) {
throw new SchemaSerializationException("measurement is a required field.");
}
val measurement = (String) measurementField;
// looking for timestamp
long timestamp;
val timestampField = getFiled(genericRecord, "timestamp");
if (null == timestampField) {
timestamp = System.currentTimeMillis();
} else if (timestampField instanceof Number) {
timestamp = ((Number) timestampField).longValue();
} else if (timestampField instanceof String) {
timestamp = Long.parseLong((String) timestampField);
} else {
throw new SchemaSerializationException("Invalid timestamp field");
}
val point = Point.measurement(measurement).time(timestamp, writePrecision);
// Looking for tag fields
val tagsField = getFiled(genericRecord, "tags");
if (null != tagsField) {
if (tagsField instanceof GenericRecord) {
// JSONSchema
GenericRecord tagsRecord = (GenericRecord) tagsField;
for (Field field : tagsRecord.getFields()) {
val fieldName = field.getName();
val value = tagsRecord.getField(field);
point.addTag(fieldName, (String) value);
}
} else if (Map.class.isAssignableFrom(tagsField.getClass())) {
// AvroSchema
Map<Object, Object> tagsMap = (Map<Object, Object>) tagsField;
tagsMap.forEach((key, value) -> point.addTag(key.toString(), value.toString()));
} else {
throw new SchemaSerializationException("Unknown type for 'tags'");
}
}
// Looking for sensor fields
val columnsField = genericRecord.getField("fields");
if (columnsField instanceof GenericRecord) {
// JSONSchema
val columnsRecord = (GenericRecord) columnsField;
for (Field field : columnsRecord.getFields()) {
val fieldName = field.getName();
val value = columnsRecord.getField(field);
addPointField(point, fieldName, value);
}
} else if (Map.class.isAssignableFrom(columnsField.getClass())) {
// AvroSchema
val columnsMap = (Map<Object, Object>) columnsField;
columnsMap.forEach((key, value) -> addPointField(point, key.toString(), value));
} else {
throw new SchemaSerializationException("Unknown type for 'fields'");
}
return point;
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class BatchSink method flush.
private void flush() {
List<Record<R>> toFlushList;
synchronized (this) {
if (incomingList.isEmpty()) {
return;
}
toFlushList = incomingList;
incomingList = Lists.newArrayList();
}
val points = Lists.<T>newArrayListWithExpectedSize(toFlushList.size());
if (CollectionUtils.isNotEmpty(toFlushList)) {
for (Record<R> record : toFlushList) {
try {
points.add(buildPoint(record));
} catch (Exception e) {
record.fail();
toFlushList.remove(record);
log.warn("Record flush thread was exception ", e);
}
}
}
try {
if (CollectionUtils.isNotEmpty(points)) {
writePoints(points);
}
toFlushList.forEach(Record::ack);
points.clear();
toFlushList.clear();
} catch (Exception e) {
toFlushList.forEach(Record::fail);
log.error("InfluxDB write batch data exception ", e);
}
}
use of org.apache.pulsar.functions.api.Record in project pulsar by yahoo.
the class InfluxDBGenericRecordSink method buildPoint.
@Override
protected Point buildPoint(Record<GenericRecord> message) throws Exception {
Map<String, String> tags;
Map<String, Object> fields = Maps.newHashMap();
GenericRecord record = message.getValue();
Object measurementField = getFiled(record, "measurement");
if (null == measurementField) {
throw new SchemaSerializationException("measurement is a required field.");
}
String measurement = measurementField.toString();
// Looking for tags
Object tagsField = getFiled(record, "tags");
if (null == tagsField) {
tags = ImmutableMap.of();
} else if (Map.class.isAssignableFrom(tagsField.getClass())) {
tags = ((Map<Object, Object>) tagsField).entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey().toString(), entry -> entry.getValue().toString()));
} else {
// Field 'tags' that is not of Map type will be ignored
tags = ImmutableMap.of();
}
// Just insert the current time millis
long timestamp = System.currentTimeMillis();
for (Field field : record.getFields()) {
String fieldName = field.getName();
if (FIELDS_TO_SKIP.contains(fieldName)) {
continue;
}
Object fieldValue = record.getField(field);
if (null != fieldValue) {
fields.put(fieldName, fieldValue);
}
}
Point.Builder builder = Point.measurement(measurement).time(timestamp, TimeUnit.MILLISECONDS).tag(tags).fields(fields);
return builder.build();
}
Aggregations