use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testFlat1Flatten.
@Test
public void testFlat1Flatten() throws IOException {
final String file = "example/flattening/test_flat_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "list"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim2", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim3", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list", "$.listDim"));
JSONPathSpec flattenSpec = new JSONPathSpec(false, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class TimestampsParquetReaderTest method testDateHandling.
@Test
public void testDateHandling() throws IOException {
final String file = "example/timestamps/test_date_data.snappy.parquet";
InputRowSchema schemaAsString = new InputRowSchema(new TimestampSpec("date_as_string", "Y-M-d", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputRowSchema schemaAsDate = new InputRowSchema(new TimestampSpec("date_as_date", null, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
InputEntityReader readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
List<InputRow> rowsWithString = readAllRows(readerAsString);
List<InputRow> rowsWithDate = readAllRows(readerAsDate);
Assert.assertEquals(rowsWithDate.size(), rowsWithString.size());
for (int i = 0; i < rowsWithDate.size(); i++) {
Assert.assertEquals(rowsWithString.get(i).getTimestamp(), rowsWithDate.get(i).getTimestamp());
}
readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampledAsString = sampleAllRows(readerAsString);
List<InputRowListPlusRawValues> sampledAsDate = sampleAllRows(readerAsDate);
final String expectedJson = "{\n" + " \"date_as_string\" : \"2017-06-18\",\n" + " \"timestamp_as_timestamp\" : 1497702471815,\n" + " \"timestamp_as_string\" : \"2017-06-17 14:27:51.815\",\n" + " \"idx\" : 1,\n" + " \"date_as_date\" : 1497744000000\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsString.get(0).getRawValues()));
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsDate.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class JsonLineReaderTest method testParseRowKeepNullColumns.
@Test
public void testParseRowKeepNullColumns() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, true);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"o\":{\"mg\":null}}"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
Assert.assertTrue(row.getDimension("bar").isEmpty());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertTrue(row.getDimension("path_omg").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class JsonLineReaderTest method testKeepNullColumnsWithNoNullValues.
@Test
public void testKeepNullColumnsWithNoNullValues() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, true);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":1,\"foo\":\"x\",\"o\":{\"mg\":\"a\"}}"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("bar")));
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class JsonReaderTest method testParsePrettyFormatJSON.
@Test
public void testParsePrettyFormatJSON() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
false);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\n" + " \"timestamp\": \"2019-01-01\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
Aggregations