Search in sources :

Example 11 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLog.

@Test
public void testCLFLog() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:06:47:10 +0000] \"GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1\"" + " 301 256 \"-\" \"Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)\"";
    StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:06:47:10 +0000", output.get("request_time"));
    Assert.assertEquals("GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1", output.get("request"));
    Assert.assertEquals(301, (int) output.get("status"));
    Assert.assertEquals(256, (int) output.get("content_length"));
    Assert.assertNull(output.get("referrer"));
    Assert.assertEquals("Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)", output.get("user_agent"));
}
Also used : FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 12 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class RecordFormats method createInitializedFormat.

/**
 * Create an initialized record format for the given format specification. The name in the specification is
 * first checked against standard names like "CSV" or "TSV". If it is a standard name, the corresponding
 * format will be created, with specification settings applied on top of default settings.
 * For example, "CSV" will map to the {@link DelimitedStringsRecordFormat}, with a comma as the delimiter,
 * whereas "TSV" will map to the {@link DelimitedStringsRecordFormat}, with a tab as the delimiter.
 * If the name is not a standard name, it is interpreted as a class name.
 *
 * @param spec the specification for the format to create and initialize
 * @param <FROM> Type of underlying object the format reads
 * @param <TO> Type of object the format reads the underlying object into
 * @return Initialized {@link RecordFormat} based on the given name
 * @throws IllegalAccessException if there was an illegal access when instantiating the record format
 * @throws InstantiationException if there was an exception instantiating the record format
 * @throws ClassNotFoundException if the record format class could not be found
 * @throws UnsupportedTypeException if the specification is not supported by the format
 */
public static <FROM, TO> RecordFormat<FROM, TO> createInitializedFormat(FormatSpecification spec) throws IllegalAccessException, InstantiationException, ClassNotFoundException, UnsupportedTypeException {
    String name = spec.getName();
    // check if it's a standard class
    Class<? extends RecordFormat> formatClass = NAME_CLASS_MAP.get(name.toLowerCase());
    @SuppressWarnings("unchecked") RecordFormat<FROM, TO> format = (RecordFormat<FROM, TO>) (formatClass == null ? Class.forName(name).newInstance() : formatClass.newInstance());
    // compute actual settings: use default settings if present
    Map<String, String> settings;
    Map<String, String> defaultSettings = NAME_SETTINGS_MAP.get(name.toLowerCase());
    if (defaultSettings != null) {
        settings = Maps.newHashMap(defaultSettings);
        if (spec.getSettings() != null) {
            settings.putAll(spec.getSettings());
        }
    } else {
        settings = spec.getSettings();
    }
    // compute actual schema
    Schema schema;
    Schema defaultSchema = DEFAULT_SCHEMA_MAP.get(name.toLowerCase());
    if (defaultSchema != null && spec.getSchema() == null) {
        schema = defaultSchema;
    } else {
        schema = spec.getSchema();
    }
    FormatSpecification actualSpec = new FormatSpecification(name, schema, settings);
    format.initialize(actualSpec);
    return format;
}
Also used : RecordFormat(io.cdap.cdap.api.data.format.RecordFormat) Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification)

Example 13 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLogWithNull.

@Test
public void testCLFLogWithNull() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:09:58:24 +0000] \"-\" 408 - \"-\" \"-\"";
    StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:09:58:24 +0000", output.get("request_time"));
    Assert.assertNull(output.get("request"));
    Assert.assertEquals(408, (int) output.get("status"));
    Assert.assertNull(output.get("content_length"));
    Assert.assertNull(output.get("referrer"));
    Assert.assertNull(output.get("user_agent"));
}
Also used : FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 14 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testInvalid.

@Test(expected = UnexpectedFormatException.class)
public void testInvalid() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10[01/Feb/2015:06:47:10 +0000";
    StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
}
Also used : FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 15 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLogWithEscapedDoubleQuotes.

@Test
public void testCLFLogWithEscapedDoubleQuotes() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:06:38:58 +0000] \"GET /plugins/servlet/buildStatusImage/CDAP-DUT " + "HTTP/1.1\" 301 257 \"http://cdap.io/\" \"\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"\"";
    StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(data)));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:06:38:58 +0000", output.get("request_time"));
    Assert.assertEquals("GET /plugins/servlet/buildStatusImage/CDAP-DUT HTTP/1.1", output.get("request"));
    Assert.assertEquals(301, (int) output.get("status"));
    Assert.assertEquals(257, (int) output.get("content_length"));
    Assert.assertEquals("http://cdap.io/", output.get("referrer"));
    Assert.assertEquals("\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"", output.get("user_agent"));
}
Also used : FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

FormatSpecification (io.cdap.cdap.api.data.format.FormatSpecification)27 Test (org.junit.Test)26 Schema (io.cdap.cdap.api.data.schema.Schema)18 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)15 ByteBuffer (java.nio.ByteBuffer)8 GenericRecord (org.apache.avro.generic.GenericRecord)3 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)3 RecordFormat (io.cdap.cdap.api.data.format.RecordFormat)1