use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testCLFLogWithNull.
@Test
public void testCLFLogWithNull() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.<String, String>of());
format.initialize(spec);
String data = "10.10.10.10 - - [01/Feb/2015:09:58:24 +0000] \"-\" 408 - \"-\" \"-\"";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
Assert.assertEquals("10.10.10.10", output.get("remote_host"));
Assert.assertNull(output.get("remote_login"));
Assert.assertNull(output.get("auth_user"));
Assert.assertEquals("01/Feb/2015:09:58:24 +0000", output.get("request_time"));
Assert.assertNull(output.get("request"));
Assert.assertEquals(408, output.get("status"));
Assert.assertNull(output.get("content_length"));
Assert.assertNull(output.get("referrer"));
Assert.assertNull(output.get("user_agent"));
}
use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class StreamBatchSource method transform.
@Override
public void transform(KeyValue<LongWritable, Object> input, Emitter<StructuredRecord> emitter) throws Exception {
// if not format spec was given, the value is a StreamEvent
if (Strings.isNullOrEmpty(streamBatchConfig.format)) {
StreamEvent event = (StreamEvent) input.getValue();
Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
StructuredRecord output = StructuredRecord.builder(DEFAULT_SCHEMA).set("ts", input.getKey().get()).set("headers", headers).set("body", event.getBody()).build();
emitter.emit(output);
} else {
// otherwise, it will be a GenericStreamEventData
@SuppressWarnings("unchecked") GenericStreamEventData<StructuredRecord> event = (GenericStreamEventData<StructuredRecord>) input.getValue();
StructuredRecord record = event.getBody();
Schema inputSchema = record.getSchema();
Schema outputSchema = schemaCache.get(inputSchema);
// if we haven't seen this schema before, generate the output schema (add ts and header fields)
if (outputSchema == null) {
List<Schema.Field> fields = Lists.newArrayList();
fields.add(DEFAULT_SCHEMA.getField("ts"));
fields.add(DEFAULT_SCHEMA.getField("headers"));
fields.addAll(inputSchema.getFields());
outputSchema = Schema.recordOf(inputSchema.getRecordName(), fields);
schemaCache.put(inputSchema, outputSchema);
}
// easier to just deal with an empty map than deal with nullables, so the headers field is non-nullable.
Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
builder.set("ts", input.getKey().get());
builder.set("headers", headers);
for (Schema.Field field : inputSchema.getFields()) {
String fieldName = field.getName();
builder.set(fieldName, record.get(fieldName));
}
emitter.emit(builder.build());
}
}
use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class LookupTransform method transform.
@Override
public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception {
T lookedUpValue = lookup.lookup((String) input.get(config.lookupKey));
// for the output schema, copy all the input fields, and add the 'destinationField'
List<Schema.Field> outFields = new ArrayList<>();
for (Schema.Field field : input.getSchema().getFields()) {
outFields.add(field);
}
if (lookedUpValue instanceof String) {
outFields.add(Schema.Field.of(config.destinationField, Schema.of(Schema.Type.STRING)));
} else if (lookedUpValue instanceof Row) {
Row lookedupRow = (Row) lookedUpValue;
for (byte[] column : lookedupRow.getColumns().keySet()) {
outFields.add(Schema.Field.of(Bytes.toString(column), Schema.of(Schema.Type.STRING)));
}
} else {
throw new IllegalArgumentException("Unexpected value type: " + lookedUpValue.getClass());
}
Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields);
// copy all the values
StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema);
for (Schema.Field inField : input.getSchema().getFields()) {
if (inField.getName().equals(config.lookupKey)) {
if (lookedUpValue instanceof String) {
outputBuilder.set(config.destinationField, lookedUpValue);
} else {
// due to the check above, we know its a Row
Row lookedupRow = (Row) lookedUpValue;
for (Map.Entry<byte[], byte[]> entry : lookedupRow.getColumns().entrySet()) {
outputBuilder.set(Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue()));
}
}
}
// what if the destinationField already exists?
outputBuilder.set(inField.getName(), input.get(inField.getName()));
}
emitter.emit(outputBuilder.build());
}
use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class DupeFlagger method merge.
@Override
public StructuredRecord merge(StructuredRecord joinKey, Iterable<JoinElement<StructuredRecord>> joinRow) {
StructuredRecord record = null;
boolean containsDupe = false;
for (JoinElement<StructuredRecord> element : joinRow) {
if (element.getStageName().equals(config.keep)) {
record = element.getInputRecord();
} else {
containsDupe = true;
}
}
if (record == null) {
// can only happen if 'keep' was a macro and did not evaluate to one of the inputs
throw new IllegalArgumentException("No record for " + config.keep + " was found.");
}
Schema outputSchema = getOutputSchema(record.getSchema());
StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outputSchema).set(config.flagField, containsDupe);
for (Schema.Field field : record.getSchema().getFields()) {
outputBuilder.set(field.getName(), record.get(field.getName()));
}
return outputBuilder.build();
}
use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.
the class MockRuntimeDatasetSink method readOutput.
/**
* Used to read the records written by this sink.
*
* @param tableManager dataset manager used to get the sink dataset to read from
*/
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
Table table = tableManager.get();
try (Scanner scanner = table.scan(null, null)) {
List<StructuredRecord> records = new ArrayList<>();
Row row;
while ((row = scanner.next()) != null) {
Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
String recordStr = row.getString(RECORD_COL);
records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
}
return records;
}
}
Aggregations