Search in sources :

Example 41 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class MockJoiner method joinOn.

@Override
public StructuredRecord joinOn(String stageName, StructuredRecord record) throws Exception {
    List<Schema.Field> fields = new ArrayList<>();
    Schema schema = record.getSchema();
    // TODO create output record based on fields properties
    Map<String, List<String>> stageToJoinKey = config.getJoinKeys();
    List<String> joinKeys = stageToJoinKey.get(stageName);
    int i = 1;
    for (String joinKey : joinKeys) {
        Schema.Field joinField = Schema.Field.of(String.valueOf(i++), schema.getField(joinKey).getSchema());
        fields.add(joinField);
    }
    Schema keySchema = Schema.recordOf("join.key", fields);
    StructuredRecord.Builder keyRecordBuilder = StructuredRecord.builder(keySchema);
    i = 1;
    for (String joinKey : joinKeys) {
        keyRecordBuilder.set(String.valueOf(i++), record.get(joinKey));
    }
    return keyRecordBuilder.build();
}
Also used : PluginPropertyField(co.cask.cdap.api.plugin.PluginPropertyField) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 42 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class MockJoiner method merge.

@Override
public StructuredRecord merge(StructuredRecord joinKey, Iterable<JoinElement<StructuredRecord>> joinRow) {
    StructuredRecord.Builder outRecordBuilder;
    outRecordBuilder = StructuredRecord.builder(outputSchema);
    for (JoinElement<StructuredRecord> joinElement : joinRow) {
        StructuredRecord record = joinElement.getInputRecord();
        for (Schema.Field field : record.getSchema().getFields()) {
            outRecordBuilder.set(field.getName(), record.get(field.getName()));
        }
    }
    return outRecordBuilder.build();
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 43 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class MockSink method write.

@Override
public int write(Iterable<StructuredRecord> records, DataWriter dataWriter) throws Exception {
    if (dir == null) {
        return 0;
    }
    File outputFile = new File(dir, String.valueOf(count));
    File doneFile = new File(dir, String.valueOf(count) + ".done");
    List<StructuredRecord> outputRecords = new ArrayList<>();
    for (StructuredRecord record : records) {
        outputRecords.add(record);
    }
    Files.write(outputFile.toPath(), GSON.toJson(records).getBytes(StandardCharsets.UTF_8));
    doneFile.createNewFile();
    count++;
    return outputRecords.size();
}
Also used : ArrayList(java.util.ArrayList) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 44 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class FlattenErrorTransform method transform.

@Override
public void transform(ErrorRecord<StructuredRecord> input, Emitter<StructuredRecord> emitter) throws Exception {
    StructuredRecord invalidRecord = input.getRecord();
    StructuredRecord.Builder output = StructuredRecord.builder(getOutputSchema(invalidRecord.getSchema()));
    for (Schema.Field field : invalidRecord.getSchema().getFields()) {
        output.set(field.getName(), invalidRecord.get(field.getName()));
    }
    emitter.emit(output.set("errMsg", input.getErrorMessage()).set("errCode", input.getErrorCode()).set("errStage", input.getStageName()).build());
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 45 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class JsonStructuredRecordDatumReader method decodeRecord.

@Override
protected StructuredRecord decodeRecord(Decoder decoder, Schema schema) throws IOException {
    StructuredRecord.Builder builder = StructuredRecord.builder(schema);
    JsonReader jsonReader = getJsonReader(decoder);
    jsonReader.beginObject();
    while (jsonReader.peek() != JsonToken.END_OBJECT) {
        Schema.Field field = schema.getField(jsonReader.nextName());
        if (field == null) {
            // Ignore unrecognized fields
            jsonReader.skipValue();
            continue;
        }
        builder.set(field.getName(), decode(decoder, field.getSchema()));
    }
    jsonReader.endObject();
    return builder.build();
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) JsonReader(com.google.gson.stream.JsonReader) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Aggregations

StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)97 Schema (co.cask.cdap.api.data.schema.Schema)71 Test (org.junit.Test)51 Table (co.cask.cdap.api.dataset.table.Table)36 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)36 ApplicationId (co.cask.cdap.proto.id.ApplicationId)36 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 AppRequest (co.cask.cdap.proto.artifact.AppRequest)31 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)25 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)25 WorkflowManager (co.cask.cdap.test.WorkflowManager)23 ArrayList (java.util.ArrayList)20 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)18 HashSet (java.util.HashSet)10 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)8 File (java.io.File)8 TimeoutException (java.util.concurrent.TimeoutException)8 Put (co.cask.cdap.api.dataset.table.Put)7 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)7