use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testNested1FlattenSelectListItem.
@Test
public void testNested1FlattenSelectListItem() throws IOException {
final String file = "example/flattening/test_nested_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listextracted", "$.nestedData.listDim[1]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listextracted").get(0));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testFlat1NoFlattenSpec.
@Test
public void testFlat1NoFlattenSpec() throws IOException {
final String file = "example/flattening/test_flat_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "listDim"))), ColumnsFilter.all());
JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testFlat1Autodiscover.
@Test
public void testFlat1Autodiscover() throws IOException {
final String file = "example/flattening/test_flat_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testNested1Autodiscover.
@Test
public void testNested1Autodiscover() throws IOException {
final String file = "example/flattening/test_nested_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
List<String> dims = rows.get(0).getDimensions();
Assert.assertFalse(dims.contains("dim2"));
Assert.assertFalse(dims.contains("dim3"));
Assert.assertFalse(dims.contains("listDim"));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testFlat1Flatten.
@Test
public void testFlat1Flatten() throws IOException {
final String file = "example/flattening/test_flat_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "list"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim2", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim3", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list", "$.listDim"));
JSONPathSpec flattenSpec = new JSONPathSpec(false, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Aggregations