Search in sources :

Example 36 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonLineReaderTest method testParseRowWithConditional.

@Test
public void testParseRowWithConditional() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo"))), null, null);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"something_else\": {\"foo\": \"test\"}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals("test", Iterables.getOnlyElement(row.getDimension("bar")));
            Assert.assertEquals(Collections.emptyList(), row.getDimension("foo"));
            Assert.assertTrue(row.getDimension("baz").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 37 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class OssInputSourceTest method testCompressedReader.

@Test
public void testCompressedReader() throws IOException {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)), CONTENT);
    expectListObjects(EXPECTED_COMPRESSED_URIS.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)), CONTENT);
    expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(0));
    expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(1));
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)), null, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
    EasyMock.verify(OSSCLIENT);
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 38 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class CsvReaderTest method testQuotes.

@Test
public void testQuotes() throws IOException {
    final ByteEntity source = writeData(ImmutableList.of("3,\"Lets do some \"\"normal\"\" quotes\",2018-05-05T10:00:00Z", "34,\"Lets do some \"\"normal\"\", quotes with comma\",2018-05-06T10:00:00Z", "343,\"Lets try \\\"\"it\\\"\" with slash quotes\",2018-05-07T10:00:00Z", "545,\"Lets try \\\"\"it\\\"\", with slash quotes and comma\",2018-05-08T10:00:00Z", "65,Here I write \\n slash n,2018-05-09T10:00:00Z"));
    final List<InputRow> expectedResults = ImmutableList.of(new MapBasedInputRow(DateTimes.of("2018-05-05T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "3", "Comment", "Lets do some \"normal\" quotes", "Timestamp", "2018-05-05T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-06T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "34", "Comment", "Lets do some \"normal\", quotes with comma", "Timestamp", "2018-05-06T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-07T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "343", "Comment", "Lets try \\\"it\\\" with slash quotes", "Timestamp", "2018-05-07T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-08T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "545", "Comment", "Lets try \\\"it\\\", with slash quotes and comma", "Timestamp", "2018-05-08T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-09T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z")));
    final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("Value", "Comment", "Timestamp"), null, null, false, 0);
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("Timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), ColumnsFilter.all()), source, null);
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        final Iterator<InputRow> expectedRowIterator = expectedResults.iterator();
        while (iterator.hasNext()) {
            Assert.assertTrue(expectedRowIterator.hasNext());
            Assert.assertEquals(expectedRowIterator.next(), iterator.next());
        }
    }
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 39 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class InputEntityIteratingReaderTest method test.

@Test
public void test() throws IOException {
    final int numFiles = 5;
    final List<File> files = new ArrayList<>();
    for (int i = 0; i < numFiles; i++) {
        final File file = temporaryFolder.newFile("test_" + i);
        files.add(file);
        try (Writer writer = Files.newBufferedWriter(file.toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("%d,%s,%d\n", 20190101 + i, "name_" + i, i));
            writer.write(StringUtils.format("%d,%s,%d", 20190102 + i, "name_" + (i + 1), i + 1));
        }
    }
    final InputEntityIteratingReader firehose = new InputEntityIteratingReader(new InputRowSchema(new TimestampSpec("time", "yyyyMMdd", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "name", "score"))), ColumnsFilter.all()), new CsvInputFormat(ImmutableList.of("time", "name", "score"), null, null, false, 0), files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator(), temporaryFolder.newFolder());
    try (CloseableIterator<InputRow> iterator = firehose.read()) {
        int i = 0;
        while (iterator.hasNext()) {
            InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 1)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(Integer.toString(i), Iterables.getOnlyElement(row.getDimension("score")));
            Assert.assertTrue(iterator.hasNext());
            row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 2)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", i + 1), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(Integer.toString(i + 1), Iterables.getOnlyElement(row.getDimension("score")));
            i++;
        }
        Assert.assertEquals(numFiles, i);
    }
}
Also used : ArrayList(java.util.ArrayList) InputRow(org.apache.druid.data.input.InputRow) InputRowSchema(org.apache.druid.data.input.InputRowSchema) File(java.io.File) Writer(java.io.Writer) Test(org.junit.Test)

Example 40 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonReaderTest method testSampleEmptyText.

@Test
public void testSampleEmptyText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    // input is empty
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8(""));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // the total num of iteration is 1
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            numActualIterations++;
            final InputRowListPlusRawValues rawValues = iterator.next();
            Assert.assertNotNull(rawValues.getParseException());
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

InputRowSchema (org.apache.druid.data.input.InputRowSchema)63 Test (org.junit.Test)55 InputRow (org.apache.druid.data.input.InputRow)52 InputEntityReader (org.apache.druid.data.input.InputEntityReader)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)37 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)29 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)26 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)24 InputSourceReader (org.apache.druid.data.input.InputSourceReader)10 ByteEntity (org.apache.druid.data.input.impl.ByteEntity)9 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 File (java.io.File)7 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)5 ArrayList (java.util.ArrayList)4 Collections (java.util.Collections)4 List (java.util.List)4 Map (java.util.Map)4 Nullable (javax.annotation.Nullable)4