Search in sources :

Example 6 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class CompatParquetReaderTest method testOldRepeatedInt.

@Test
public void testOldRepeatedInt() throws IOException {
    final String file = "example/compat/old-repeated-int.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("repeatedInt"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "repeatedInt", "repeatedInt"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("1", rows.get(0).getDimension("repeatedInt").get(0));
    Assert.assertEquals("2", rows.get(0).getDimension("repeatedInt").get(1));
    Assert.assertEquals("3", rows.get(0).getDimension("repeatedInt").get(2));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"repeatedInt\" : [ 1, 2, 3 ]\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 7 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class CompatParquetReaderTest method testReadNestedArrayStruct.

@Test
public void testReadNestedArrayStruct() throws IOException {
    final String file = "example/compat/nested-array-struct.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec", "extracted1", "extracted2"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted1", "$.myComplex[0].id"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted2", "$.myComplex[0].repeatedMessage[*].someId"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("5", rows.get(1).getDimension("primitive").get(0));
    Assert.assertEquals("4", rows.get(1).getDimension("extracted1").get(0));
    Assert.assertEquals("6", rows.get(1).getDimension("extracted2").get(0));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"primitive\" : 2,\n" + "  \"myComplex\" : [ {\n" + "    \"id\" : 1,\n" + "    \"repeatedMessage\" : [ 3 ]\n" + "  } ]\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 8 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class DecimalParquetReaderTest method testReadParquetDecimalFixedLen.

@Test
public void testReadParquetDecimalFixedLen() throws IOException {
    final String file = "example/decimals/dec-in-fixed-len.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("fixed_len_dec"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "fixed_len_dec", "fixed_len_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.fixed_len_dec"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("1.0", rows.get(1).getDimension("fixed_len_dec").get(0));
    Assert.assertEquals(new BigDecimal("1.0"), rows.get(1).getMetric("metric1"));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"fixed_len_dec\" : 1.0\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) BigDecimal(java.math.BigDecimal) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 9 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class DecimalParquetReaderTest method testReadParquetDecimali64.

@Test
public void testReadParquetDecimali64() throws IOException {
    final String file = "example/decimals/dec-in-i64.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i64_dec"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i64_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i64_dec"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("100", rows.get(1).getDimension("i64_dec").get(0));
    Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"i64_dec\" : 100\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) BigDecimal(java.math.BigDecimal) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 10 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testFlat1FlattenSelectListItem.

@Test
public void testFlat1FlattenSelectListItem() throws IOException {
    final String file = "example/flattening/test_flat_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "listExtracted"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim2", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listExtracted", "$.listDim[1]"));
    JSONPathSpec flattenSpec = new JSONPathSpec(false, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listExtracted").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

InputRowSchema (org.apache.druid.data.input.InputRowSchema)63 Test (org.junit.Test)55 InputRow (org.apache.druid.data.input.InputRow)52 InputEntityReader (org.apache.druid.data.input.InputEntityReader)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)37 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)29 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)26 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)24 InputSourceReader (org.apache.druid.data.input.InputSourceReader)10 ByteEntity (org.apache.druid.data.input.impl.ByteEntity)9 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 File (java.io.File)7 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)5 ArrayList (java.util.ArrayList)4 Collections (java.util.Collections)4 List (java.util.List)4 Map (java.util.Map)4 Nullable (javax.annotation.Nullable)4