use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class ParquetReaderResourceLeakTest method testFetchOnReadCleanupAfterExhaustingIterator.
@Test
public void testFetchOnReadCleanupAfterExhaustingIterator() throws IOException {
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))), ColumnsFilter.all());
FetchingFileEntity entity = new FetchingFileEntity(new File("example/wiki/wiki.parquet"));
ParquetInputFormat parquet = new ParquetInputFormat(JSONPathSpec.DEFAULT, false, new Configuration());
File tempDir = temporaryFolder.newFolder();
InputEntityReader reader = parquet.createReader(schema, entity, tempDir);
Assert.assertEquals(0, Objects.requireNonNull(tempDir.list()).length);
try (CloseableIterator<InputRow> iterator = reader.read()) {
Assert.assertTrue(Objects.requireNonNull(tempDir.list()).length > 0);
while (iterator.hasNext()) {
iterator.next();
}
}
Assert.assertEquals(0, Objects.requireNonNull(tempDir.list()).length);
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class CompatParquetReaderTest method testOldRepeatedInt.
@Test
public void testOldRepeatedInt() throws IOException {
final String file = "example/compat/old-repeated-int.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("repeatedInt"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "repeatedInt", "repeatedInt"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("1", rows.get(0).getDimension("repeatedInt").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("repeatedInt").get(1));
Assert.assertEquals("3", rows.get(0).getDimension("repeatedInt").get(2));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"repeatedInt\" : [ 1, 2, 3 ]\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class CompatParquetReaderTest method testReadNestedArrayStruct.
@Test
public void testReadNestedArrayStruct() throws IOException {
final String file = "example/compat/nested-array-struct.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec", "extracted1", "extracted2"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted1", "$.myComplex[0].id"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extracted2", "$.myComplex[0].repeatedMessage[*].someId"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
Assert.assertEquals("5", rows.get(1).getDimension("primitive").get(0));
Assert.assertEquals("4", rows.get(1).getDimension("extracted1").get(0));
Assert.assertEquals("6", rows.get(1).getDimension("extracted2").get(0));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"primitive\" : 2,\n" + " \"myComplex\" : [ {\n" + " \"id\" : 1,\n" + " \"repeatedMessage\" : [ 3 ]\n" + " } ]\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class DecimalParquetReaderTest method testReadParquetDecimalFixedLen.
@Test
public void testReadParquetDecimalFixedLen() throws IOException {
final String file = "example/decimals/dec-in-fixed-len.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("fixed_len_dec"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "fixed_len_dec", "fixed_len_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.fixed_len_dec"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
Assert.assertEquals("1.0", rows.get(1).getDimension("fixed_len_dec").get(0));
Assert.assertEquals(new BigDecimal("1.0"), rows.get(1).getMetric("metric1"));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"fixed_len_dec\" : 1.0\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class DecimalParquetReaderTest method testReadParquetDecimali64.
@Test
public void testReadParquetDecimali64() throws IOException {
final String file = "example/decimals/dec-in-i64.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i64_dec"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i64_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i64_dec"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
Assert.assertEquals("100", rows.get(1).getDimension("i64_dec").get(0));
Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"i64_dec\" : 100\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
Aggregations