Search in sources :

Example 6 with Context

use of org.apache.pulsar.functions.api.Context in project incubator-pulsar by apache.

the class WindowFunctionExecutorTest method testPrepareLateTupleStreamWithoutTs.

@Test
public void testPrepareLateTupleStreamWithoutTs() throws Exception {
    context = mock(Context.class);
    doReturn("test-function").when(context).getFunctionName();
    doReturn("test-namespace").when(context).getNamespace();
    doReturn("test-tenant").when(context).getTenant();
    doReturn(Collections.singleton("test-source-topic")).when(context).getInputTopics();
    doReturn("test-sink-topic").when(context).getOutputTopic();
    WindowConfig windowConfig = new WindowConfig();
    windowConfig.setWindowLengthDurationMs(20L);
    windowConfig.setSlidingIntervalDurationMs(10L);
    windowConfig.setLateDataTopic("$late");
    windowConfig.setMaxLagMs(5L);
    windowConfig.setWatermarkEmitIntervalMs(10L);
    windowConfig.setActualWindowFunctionClassName(TestFunction.class.getName());
    doReturn(Optional.of(new Gson().fromJson(new Gson().toJson(windowConfig), Map.class))).when(context).getUserConfigValue(WindowConfig.WINDOW_CONFIG_KEY);
    try {
        testWindowedPulsarFunction.process(10L, context);
        fail();
    } catch (IllegalArgumentException e) {
        assertEquals(e.getMessage(), "Late data topic can be defined only when specifying a " + "timestamp extractor class");
    }
}
Also used : WindowContext(org.apache.pulsar.functions.api.WindowContext) Context(org.apache.pulsar.functions.api.Context) WindowConfig(org.apache.pulsar.common.functions.WindowConfig) Gson(com.google.gson.Gson) Test(org.testng.annotations.Test)

Example 7 with Context

use of org.apache.pulsar.functions.api.Context in project pulsar by yahoo.

the class RemoveAvroFieldFunction method process.

@Override
public Void process(GenericObject genericObject, Context context) throws Exception {
    Record<?> currentRecord = context.getCurrentRecord();
    log.info("apply to {} {}", genericObject, genericObject.getNativeObject());
    log.info("record with schema {} version {} {}", currentRecord.getSchema(), currentRecord.getMessage().get().getSchemaVersion(), currentRecord);
    Object nativeObject = genericObject.getNativeObject();
    Schema<?> schema = currentRecord.getSchema();
    Schema outputSchema = schema;
    Object outputObject = genericObject.getNativeObject();
    boolean someThingDone = false;
    if (schema instanceof KeyValueSchema && nativeObject instanceof KeyValue) {
        KeyValueSchema kvSchema = (KeyValueSchema) schema;
        Schema keySchema = kvSchema.getKeySchema();
        Schema valueSchema = kvSchema.getValueSchema();
        // remove a column "age" from the "valueSchema"
        if (valueSchema.getSchemaInfo().getType() == SchemaType.AVRO) {
            org.apache.avro.Schema avroSchema = (org.apache.avro.Schema) valueSchema.getNativeSchema().get();
            if (avroSchema.getField(FIELD_TO_REMOVE) != null) {
                org.apache.avro.Schema.Parser parser = new org.apache.avro.Schema.Parser();
                org.apache.avro.Schema originalAvroSchema = parser.parse(avroSchema.toString(false));
                org.apache.avro.Schema modified = org.apache.avro.Schema.createRecord(originalAvroSchema.getName(), originalAvroSchema.getDoc(), originalAvroSchema.getNamespace(), originalAvroSchema.isError(), originalAvroSchema.getFields().stream().filter(f -> !f.name().equals(FIELD_TO_REMOVE)).map(f -> new org.apache.avro.Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).collect(Collectors.toList()));
                KeyValue originalObject = (KeyValue) nativeObject;
                GenericRecord value = (GenericRecord) originalObject.getValue();
                org.apache.avro.generic.GenericRecord genericRecord = (org.apache.avro.generic.GenericRecord) value.getNativeObject();
                org.apache.avro.generic.GenericRecord newRecord = new GenericData.Record(modified);
                for (org.apache.avro.Schema.Field field : modified.getFields()) {
                    newRecord.put(field.name(), genericRecord.get(field.name()));
                }
                GenericDatumWriter writer = new GenericDatumWriter(modified);
                ByteArrayOutputStream oo = new ByteArrayOutputStream();
                BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(oo, null);
                writer.write(newRecord, encoder);
                Object newValue = oo.toByteArray();
                Schema newValueSchema = Schema.NATIVE_AVRO(modified);
                outputSchema = Schema.KeyValue(keySchema, newValueSchema, kvSchema.getKeyValueEncodingType());
                outputObject = new KeyValue(originalObject.getKey(), newValue);
                someThingDone = true;
            }
        }
    } else if (schema.getSchemaInfo().getType() == SchemaType.AVRO) {
        org.apache.avro.Schema avroSchema = (org.apache.avro.Schema) schema.getNativeSchema().get();
        if (avroSchema.getField(FIELD_TO_REMOVE) != null) {
            org.apache.avro.Schema.Parser parser = new org.apache.avro.Schema.Parser();
            org.apache.avro.Schema originalAvroSchema = parser.parse(avroSchema.toString(false));
            org.apache.avro.Schema modified = org.apache.avro.Schema.createRecord(originalAvroSchema.getName(), originalAvroSchema.getDoc(), originalAvroSchema.getNamespace(), originalAvroSchema.isError(), originalAvroSchema.getFields().stream().filter(f -> !f.name().equals(FIELD_TO_REMOVE)).map(f -> new org.apache.avro.Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).collect(Collectors.toList()));
            org.apache.avro.generic.GenericRecord genericRecord = (org.apache.avro.generic.GenericRecord) nativeObject;
            org.apache.avro.generic.GenericRecord newRecord = new GenericData.Record(modified);
            for (org.apache.avro.Schema.Field field : modified.getFields()) {
                newRecord.put(field.name(), genericRecord.get(field.name()));
            }
            GenericDatumWriter writer = new GenericDatumWriter(modified);
            ByteArrayOutputStream oo = new ByteArrayOutputStream();
            BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(oo, null);
            writer.write(newRecord, encoder);
            Schema newValueSchema = Schema.NATIVE_AVRO(modified);
            outputSchema = newValueSchema;
            outputObject = oo.toByteArray();
            someThingDone = true;
        }
    }
    if (!someThingDone) {
        // do some processing...
        final boolean isStruct;
        switch(currentRecord.getSchema().getSchemaInfo().getType()) {
            case AVRO:
            case JSON:
            case PROTOBUF_NATIVE:
                isStruct = true;
                break;
            default:
                isStruct = false;
                break;
        }
        if (isStruct) {
            // GenericRecord must stay wrapped
            outputObject = currentRecord.getValue();
        } else {
            // primitives and KeyValue must be unwrapped
            outputObject = nativeObject;
        }
    }
    log.info("output {} schema {}", outputObject, outputSchema);
    context.newOutputMessage(context.getOutputTopic(), outputSchema).value(outputObject).send();
    return null;
}
Also used : GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericObject(org.apache.pulsar.client.api.schema.GenericObject) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Collectors(java.util.stream.Collectors) SchemaType(org.apache.pulsar.common.schema.SchemaType) GenericData(org.apache.avro.generic.GenericData) Schema(org.apache.pulsar.client.api.Schema) BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) KeyValue(org.apache.pulsar.common.schema.KeyValue) Slf4j(lombok.extern.slf4j.Slf4j) Function(org.apache.pulsar.functions.api.Function) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) Context(org.apache.pulsar.functions.api.Context) Record(org.apache.pulsar.functions.api.Record) EncoderFactory(org.apache.avro.io.EncoderFactory) KeyValue(org.apache.pulsar.common.schema.KeyValue) Schema(org.apache.pulsar.client.api.Schema) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) Record(org.apache.pulsar.functions.api.Record) GenericRecord(org.apache.pulsar.client.api.schema.GenericRecord) KeyValueSchema(org.apache.pulsar.client.api.schema.KeyValueSchema) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericData(org.apache.avro.generic.GenericData) BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericObject(org.apache.pulsar.client.api.schema.GenericObject)

Example 8 with Context

use of org.apache.pulsar.functions.api.Context in project pulsar by apache.

the class WindowFunctionExecutorTest method setUp.

@BeforeMethod
public void setUp() {
    testWindowedPulsarFunction = new TestWindowFunctionExecutor();
    context = mock(Context.class);
    doReturn("test-function").when(context).getFunctionName();
    doReturn("test-namespace").when(context).getNamespace();
    doReturn("test-tenant").when(context).getTenant();
    Record<?> record = mock(Record.class);
    doReturn(Optional.of("test-topic")).when(record).getTopicName();
    doReturn(record).when(context).getCurrentRecord();
    windowConfig = new WindowConfig();
    windowConfig.setTimestampExtractorClassName(TestTimestampExtractor.class.getName());
    windowConfig.setWindowLengthDurationMs(20L);
    windowConfig.setSlidingIntervalDurationMs(10L);
    windowConfig.setMaxLagMs(5L);
    // trigger manually to avoid timing issues
    windowConfig.setWatermarkEmitIntervalMs(100000L);
    windowConfig.setActualWindowFunctionClassName(TestFunction.class.getName());
    doReturn(Optional.of(new Gson().fromJson(new Gson().toJson(windowConfig), Map.class))).when(context).getUserConfigValue(WindowConfig.WINDOW_CONFIG_KEY);
    doReturn(Collections.singleton("test-source-topic")).when(context).getInputTopics();
    doReturn("test-sink-topic").when(context).getOutputTopic();
}
Also used : WindowContext(org.apache.pulsar.functions.api.WindowContext) Context(org.apache.pulsar.functions.api.Context) WindowConfig(org.apache.pulsar.common.functions.WindowConfig) Gson(com.google.gson.Gson) BeforeMethod(org.testng.annotations.BeforeMethod)

Aggregations

Context (org.apache.pulsar.functions.api.Context)8 Gson (com.google.gson.Gson)6 WindowConfig (org.apache.pulsar.common.functions.WindowConfig)6 WindowContext (org.apache.pulsar.functions.api.WindowContext)6 BeforeMethod (org.testng.annotations.BeforeMethod)3 Test (org.testng.annotations.Test)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 Collectors (java.util.stream.Collectors)2 Slf4j (lombok.extern.slf4j.Slf4j)2 GenericData (org.apache.avro.generic.GenericData)2 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 EncoderFactory (org.apache.avro.io.EncoderFactory)2 Schema (org.apache.pulsar.client.api.Schema)2 GenericObject (org.apache.pulsar.client.api.schema.GenericObject)2 GenericRecord (org.apache.pulsar.client.api.schema.GenericRecord)2 KeyValueSchema (org.apache.pulsar.client.api.schema.KeyValueSchema)2 KeyValue (org.apache.pulsar.common.schema.KeyValue)2 SchemaType (org.apache.pulsar.common.schema.SchemaType)2 Function (org.apache.pulsar.functions.api.Function)2