Search in sources :

Example 26 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testFlat1FlattenSelectListItem.

@Test
public void testFlat1FlattenSelectListItem() throws IOException {
    final String file = "example/flattening/test_flat_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "listExtracted"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim2", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listExtracted", "$.listDim[1]"));
    JSONPathSpec flattenSpec = new JSONPathSpec(false, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listExtracted").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 27 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1FlattenSelectListItem.

@Test
public void testNested1FlattenSelectListItem() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listextracted", "$.nestedData.listDim[1]"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listextracted").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 28 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testFlat1NoFlattenSpec.

@Test
public void testFlat1NoFlattenSpec() throws IOException {
    final String file = "example/flattening/test_flat_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "listDim"))), ColumnsFilter.all());
    JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 29 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testFlat1Autodiscover.

@Test
public void testFlat1Autodiscover() throws IOException {
    final String file = "example/flattening/test_flat_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 30 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1Autodiscover.

@Test
public void testNested1Autodiscover() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    List<String> dims = rows.get(0).getDimensions();
    Assert.assertFalse(dims.contains("dim2"));
    Assert.assertFalse(dims.contains("dim3"));
    Assert.assertFalse(dims.contains("listDim"));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)169 Test (org.junit.Test)129 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)114 InputRow (org.apache.druid.data.input.InputRow)52 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)47 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)47 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)42 DataSchema (org.apache.druid.segment.indexing.DataSchema)39 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)37 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 InputRowSchema (org.apache.druid.data.input.InputRowSchema)36 Map (java.util.Map)32 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)32 InputEntityReader (org.apache.druid.data.input.InputEntityReader)31 ArrayList (java.util.ArrayList)29 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)25 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)24 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)24 HashMap (java.util.HashMap)23 ImmutableMap (com.google.common.collect.ImmutableMap)21