use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class RecordFormats method createInitializedFormat.
/**
* Create an initialized record format for the given format specification. The name in the specification is
* first checked against standard names like "CSV" or "TSV". If it is a standard name, the corresponding
* format will be created, with specification settings applied on top of default settings.
* For example, "CSV" will map to the {@link DelimitedStringsRecordFormat}, with a comma as the delimiter,
* whereas "TSV" will map to the {@link DelimitedStringsRecordFormat}, with a tab as the delimiter.
* If the name is not a standard name, it is interpreted as a class name.
*
* @param spec the specification for the format to create and initialize
* @param <FROM> Type of underlying object the format reads
* @param <TO> Type of object the format reads the underlying object into
* @return Initialized {@link RecordFormat} based on the given name
* @throws IllegalAccessException if there was an illegal access when instantiating the record format
* @throws InstantiationException if there was an exception instantiating the record format
* @throws ClassNotFoundException if the record format class could not be found
* @throws UnsupportedTypeException if the specification is not supported by the format
*/
public static <FROM, TO> RecordFormat<FROM, TO> createInitializedFormat(FormatSpecification spec) throws IllegalAccessException, InstantiationException, ClassNotFoundException, UnsupportedTypeException {
String name = spec.getName();
// check if it's a standard class
Class<? extends RecordFormat> formatClass = NAME_CLASS_MAP.get(name.toLowerCase());
@SuppressWarnings("unchecked") RecordFormat<FROM, TO> format = (RecordFormat<FROM, TO>) (formatClass == null ? Class.forName(name).newInstance() : formatClass.newInstance());
// compute actual settings: use default settings if present
Map<String, String> settings;
Map<String, String> defaultSettings = NAME_SETTINGS_MAP.get(name.toLowerCase());
if (defaultSettings != null) {
settings = Maps.newHashMap(defaultSettings);
if (spec.getSettings() != null) {
settings.putAll(spec.getSettings());
}
} else {
settings = spec.getSettings();
}
// compute actual schema
Schema schema;
Schema defaultSchema = DEFAULT_SCHEMA_MAP.get(name.toLowerCase());
if (defaultSchema != null && spec.getSchema() == null) {
schema = defaultSchema;
} else {
schema = spec.getSchema();
}
FormatSpecification actualSpec = new FormatSpecification(name, schema, settings);
format.initialize(actualSpec);
return format;
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class AvroRecordFormatTest method testMultipleReads.
@Test
public void testMultipleReads() throws Exception {
Schema schema = Schema.recordOf("record", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
org.apache.avro.Schema avroSchema = convertSchema(schema);
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
GenericRecord record = new GenericRecordBuilder(avroSchema).set("x", 5).build();
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(5, actual.get("x"));
record = new GenericRecordBuilder(avroSchema).set("x", 10).build();
actual = format.read(toStreamEvent(record));
Assert.assertEquals(10, actual.get("x"));
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class AvroRecordFormatTest method testSchemaProjection.
@Test
public void testSchemaProjection() throws Exception {
Schema sourceSchema = Schema.recordOf("source", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Schema readSchema = Schema.recordOf("read", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
GenericRecord record = new GenericRecordBuilder(convertSchema(sourceSchema)).set("id", 1).set("name", "value").build();
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, readSchema, ImmutableMap.<String, String>of());
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
// Convert an event that has schema associated
StructuredRecord projectedRecord = format.read(toStreamEvent(record, true));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
// Convert an event that has no schema associated. The record must be written with the read schema.
record = new GenericRecordBuilder(convertSchema(readSchema)).set("name", "value2").build();
projectedRecord = format.read(toStreamEvent(record));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testCLFLog.
@Test
public void testCLFLog() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.<String, String>of());
format.initialize(spec);
String data = "10.10.10.10 - - [01/Feb/2015:06:47:10 +0000] \"GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1\"" + " 301 256 \"-\" \"Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)\"";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
Assert.assertEquals("10.10.10.10", output.get("remote_host"));
Assert.assertNull(output.get("remote_login"));
Assert.assertNull(output.get("auth_user"));
Assert.assertEquals("01/Feb/2015:06:47:10 +0000", output.get("request_time"));
Assert.assertEquals("GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1", output.get("request"));
Assert.assertEquals(301, output.get("status"));
Assert.assertEquals(256, output.get("content_length"));
Assert.assertNull(output.get("referrer"));
Assert.assertEquals("Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)", output.get("user_agent"));
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testInvalid.
@Test(expected = UnexpectedFormatException.class)
public void testInvalid() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.<String, String>of());
format.initialize(spec);
String data = "10.10.10.10[01/Feb/2015:06:47:10 +0000";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
}
Aggregations