Search in sources :

Example 11 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class TimestampsParquetReaderTest method testDateHandling.

@Test
public void testDateHandling() throws IOException {
    final String file = "example/timestamps/test_date_data.snappy.parquet";
    InputRowSchema schemaAsString = new InputRowSchema(new TimestampSpec("date_as_string", "Y-M-d", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputRowSchema schemaAsDate = new InputRowSchema(new TimestampSpec("date_as_date", null, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputEntityReader readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
    InputEntityReader readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
    List<InputRow> rowsWithString = readAllRows(readerAsString);
    List<InputRow> rowsWithDate = readAllRows(readerAsDate);
    Assert.assertEquals(rowsWithDate.size(), rowsWithString.size());
    for (int i = 0; i < rowsWithDate.size(); i++) {
        Assert.assertEquals(rowsWithString.get(i).getTimestamp(), rowsWithDate.get(i).getTimestamp());
    }
    readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
    readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampledAsString = sampleAllRows(readerAsString);
    List<InputRowListPlusRawValues> sampledAsDate = sampleAllRows(readerAsDate);
    final String expectedJson = "{\n" + "  \"date_as_string\" : \"2017-06-18\",\n" + "  \"timestamp_as_timestamp\" : 1497702471815,\n" + "  \"timestamp_as_string\" : \"2017-06-17 14:27:51.815\",\n" + "  \"idx\" : 1,\n" + "  \"date_as_date\" : 1497744000000\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsString.get(0).getRawValues()));
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsDate.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 12 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class CompatParquetReaderTest method testOldRepeatedInt.

@Test
public void testOldRepeatedInt() throws IOException {
    final String file = "example/compat/old-repeated-int.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("repeatedInt"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "repeatedInt", "repeatedInt"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("1", rows.get(0).getDimension("repeatedInt").get(0));
    Assert.assertEquals("2", rows.get(0).getDimension("repeatedInt").get(1));
    Assert.assertEquals("3", rows.get(0).getDimension("repeatedInt").get(2));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"repeatedInt\" : [ 1, 2, 3 ]\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 13 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class CompatParquetReaderTest method testReadNestedArrayStruct.

@Test
public void testReadNestedArrayStruct() throws IOException {
    final String file = "example/compat/nested-array-struct.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec", "extracted1", "extracted2"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted1", "$.myComplex[0].id"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted2", "$.myComplex[0].repeatedMessage[*].someId"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("5", rows.get(1).getDimension("primitive").get(0));
    Assert.assertEquals("4", rows.get(1).getDimension("extracted1").get(0));
    Assert.assertEquals("6", rows.get(1).getDimension("extracted2").get(0));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"primitive\" : 2,\n" + "  \"myComplex\" : [ {\n" + "    \"id\" : 1,\n" + "    \"repeatedMessage\" : [ 3 ]\n" + "  } ]\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 14 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class InlineFirehoseTest method testNextRowWithRawNotParseable.

@Test
public void testNextRowWithRawNotParseable() {
    final String data = NOT_PARSEABLE;
    InlineFirehose target = create(data);
    InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw();
    Assert.assertNull(rowPlusRaw.getInputRows());
    Map<String, Object> raw = rowPlusRaw.getRawValues();
    Map<String, Object> expected = new HashMap<>();
    expected.put("timestamp", VALUE_0);
    expected.put("value", TIMESTAMP_0);
    Assert.assertEquals(expected, raw);
    Assert.assertNotNull(rowPlusRaw.getParseException());
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 15 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class InlineFirehoseTest method testNextRowWithRawParseable.

@Test
public void testNextRowWithRawParseable() {
    final String data = PARSEABLE;
    InlineFirehose target = create(data);
    InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw();
    InputRow row = Iterables.getOnlyElement(rowPlusRaw.getInputRows());
    assertRowValue(VALUE_0, row);
    Map<String, Object> raw = rowPlusRaw.getRawValues();
    Map<String, Object> expected = new HashMap<>();
    expected.put("timestamp", TIMESTAMP_0);
    expected.put("value", VALUE_0);
    Assert.assertEquals(expected, raw);
    Assert.assertNull(rowPlusRaw.getParseException());
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) HashMap(java.util.HashMap) InputRow(org.apache.druid.data.input.InputRow) Test(org.junit.Test)

Aggregations

InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)34 Test (org.junit.Test)31 InputRow (org.apache.druid.data.input.InputRow)29 InputEntityReader (org.apache.druid.data.input.InputEntityReader)26 InputRowSchema (org.apache.druid.data.input.InputRowSchema)24 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)21 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)17 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)15 HashMap (java.util.HashMap)4 BigDecimal (java.math.BigDecimal)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 File (java.io.File)2 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)2 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)2 InputSourceReader (org.apache.druid.data.input.InputSourceReader)2 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)2 ImmutableMap (com.google.common.collect.ImmutableMap)1