use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testCLFLog.
@Test
public void testCLFLog() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
format.initialize(spec);
String data = "10.10.10.10 - - [01/Feb/2015:06:47:10 +0000] \"GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1\"" + " 301 256 \"-\" \"Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)\"";
StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
Assert.assertEquals("10.10.10.10", output.get("remote_host"));
Assert.assertNull(output.get("remote_login"));
Assert.assertNull(output.get("auth_user"));
Assert.assertEquals("01/Feb/2015:06:47:10 +0000", output.get("request_time"));
Assert.assertEquals("GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1", output.get("request"));
Assert.assertEquals(301, (int) output.get("status"));
Assert.assertEquals(256, (int) output.get("content_length"));
Assert.assertNull(output.get("referrer"));
Assert.assertEquals("Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)", output.get("user_agent"));
}
use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class RecordFormats method createInitializedFormat.
/**
* Create an initialized record format for the given format specification. The name in the specification is
* first checked against standard names like "CSV" or "TSV". If it is a standard name, the corresponding
* format will be created, with specification settings applied on top of default settings.
* For example, "CSV" will map to the {@link DelimitedStringsRecordFormat}, with a comma as the delimiter,
* whereas "TSV" will map to the {@link DelimitedStringsRecordFormat}, with a tab as the delimiter.
* If the name is not a standard name, it is interpreted as a class name.
*
* @param spec the specification for the format to create and initialize
* @param <FROM> Type of underlying object the format reads
* @param <TO> Type of object the format reads the underlying object into
* @return Initialized {@link RecordFormat} based on the given name
* @throws IllegalAccessException if there was an illegal access when instantiating the record format
* @throws InstantiationException if there was an exception instantiating the record format
* @throws ClassNotFoundException if the record format class could not be found
* @throws UnsupportedTypeException if the specification is not supported by the format
*/
public static <FROM, TO> RecordFormat<FROM, TO> createInitializedFormat(FormatSpecification spec) throws IllegalAccessException, InstantiationException, ClassNotFoundException, UnsupportedTypeException {
String name = spec.getName();
// check if it's a standard class
Class<? extends RecordFormat> formatClass = NAME_CLASS_MAP.get(name.toLowerCase());
@SuppressWarnings("unchecked") RecordFormat<FROM, TO> format = (RecordFormat<FROM, TO>) (formatClass == null ? Class.forName(name).newInstance() : formatClass.newInstance());
// compute actual settings: use default settings if present
Map<String, String> settings;
Map<String, String> defaultSettings = NAME_SETTINGS_MAP.get(name.toLowerCase());
if (defaultSettings != null) {
settings = Maps.newHashMap(defaultSettings);
if (spec.getSettings() != null) {
settings.putAll(spec.getSettings());
}
} else {
settings = spec.getSettings();
}
// compute actual schema
Schema schema;
Schema defaultSchema = DEFAULT_SCHEMA_MAP.get(name.toLowerCase());
if (defaultSchema != null && spec.getSchema() == null) {
schema = defaultSchema;
} else {
schema = spec.getSchema();
}
FormatSpecification actualSpec = new FormatSpecification(name, schema, settings);
format.initialize(actualSpec);
return format;
}
use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testCLFLogWithNull.
@Test
public void testCLFLogWithNull() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
format.initialize(spec);
String data = "10.10.10.10 - - [01/Feb/2015:09:58:24 +0000] \"-\" 408 - \"-\" \"-\"";
StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
Assert.assertEquals("10.10.10.10", output.get("remote_host"));
Assert.assertNull(output.get("remote_login"));
Assert.assertNull(output.get("auth_user"));
Assert.assertEquals("01/Feb/2015:09:58:24 +0000", output.get("request_time"));
Assert.assertNull(output.get("request"));
Assert.assertEquals(408, (int) output.get("status"));
Assert.assertNull(output.get("content_length"));
Assert.assertNull(output.get("referrer"));
Assert.assertNull(output.get("user_agent"));
}
use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testInvalid.
@Test(expected = UnexpectedFormatException.class)
public void testInvalid() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
format.initialize(spec);
String data = "10.10.10.10[01/Feb/2015:06:47:10 +0000";
StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
}
use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class CombinedLogRecordFormatTest method testCLFLogWithEscapedDoubleQuotes.
@Test
public void testCLFLogWithEscapedDoubleQuotes() throws UnsupportedTypeException, UnexpectedFormatException {
CombinedLogRecordFormat format = new CombinedLogRecordFormat();
FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
format.initialize(spec);
String data = "10.10.10.10 - - [01/Feb/2015:06:38:58 +0000] \"GET /plugins/servlet/buildStatusImage/CDAP-DUT " + "HTTP/1.1\" 301 257 \"http://cdap.io/\" \"\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"\"";
StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
Assert.assertEquals("10.10.10.10", output.get("remote_host"));
Assert.assertNull(output.get("remote_login"));
Assert.assertNull(output.get("auth_user"));
Assert.assertEquals("01/Feb/2015:06:38:58 +0000", output.get("request_time"));
Assert.assertEquals("GET /plugins/servlet/buildStatusImage/CDAP-DUT HTTP/1.1", output.get("request"));
Assert.assertEquals(301, (int) output.get("status"));
Assert.assertEquals(257, (int) output.get("content_length"));
Assert.assertEquals("http://cdap.io/", output.get("referrer"));
Assert.assertEquals("\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"", output.get("user_agent"));
}
Aggregations