use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class JsonLineReaderTest method testParseRowWithConditional.
@Test
public void testParseRowWithConditional() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo"))), null, null);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"something_else\": {\"foo\": \"test\"}}"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), ColumnsFilter.all()), source, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals("test", Iterables.getOnlyElement(row.getDimension("bar")));
Assert.assertEquals(Collections.emptyList(), row.getDimension("foo"));
Assert.assertTrue(row.getDimension("baz").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class OssInputSourceTest method testCompressedReader.
@Test
public void testCompressedReader() throws IOException {
EasyMock.reset(OSSCLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_COMPRESSED_URIS.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)), CONTENT);
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(0));
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(1));
EasyMock.replay(OSSCLIENT);
OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(OSSCLIENT);
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class CsvReaderTest method testQuotes.
@Test
public void testQuotes() throws IOException {
final ByteEntity source = writeData(ImmutableList.of("3,\"Lets do some \"\"normal\"\" quotes\",2018-05-05T10:00:00Z", "34,\"Lets do some \"\"normal\"\", quotes with comma\",2018-05-06T10:00:00Z", "343,\"Lets try \\\"\"it\\\"\" with slash quotes\",2018-05-07T10:00:00Z", "545,\"Lets try \\\"\"it\\\"\", with slash quotes and comma\",2018-05-08T10:00:00Z", "65,Here I write \\n slash n,2018-05-09T10:00:00Z"));
final List<InputRow> expectedResults = ImmutableList.of(new MapBasedInputRow(DateTimes.of("2018-05-05T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "3", "Comment", "Lets do some \"normal\" quotes", "Timestamp", "2018-05-05T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-06T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "34", "Comment", "Lets do some \"normal\", quotes with comma", "Timestamp", "2018-05-06T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-07T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "343", "Comment", "Lets try \\\"it\\\" with slash quotes", "Timestamp", "2018-05-07T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-08T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "545", "Comment", "Lets try \\\"it\\\", with slash quotes and comma", "Timestamp", "2018-05-08T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-09T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z")));
final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("Value", "Comment", "Timestamp"), null, null, false, 0);
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("Timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), ColumnsFilter.all()), source, null);
try (CloseableIterator<InputRow> iterator = reader.read()) {
final Iterator<InputRow> expectedRowIterator = expectedResults.iterator();
while (iterator.hasNext()) {
Assert.assertTrue(expectedRowIterator.hasNext());
Assert.assertEquals(expectedRowIterator.next(), iterator.next());
}
}
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class InputEntityIteratingReaderTest method test.
@Test
public void test() throws IOException {
final int numFiles = 5;
final List<File> files = new ArrayList<>();
for (int i = 0; i < numFiles; i++) {
final File file = temporaryFolder.newFile("test_" + i);
files.add(file);
try (Writer writer = Files.newBufferedWriter(file.toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("%d,%s,%d\n", 20190101 + i, "name_" + i, i));
writer.write(StringUtils.format("%d,%s,%d", 20190102 + i, "name_" + (i + 1), i + 1));
}
}
final InputEntityIteratingReader firehose = new InputEntityIteratingReader(new InputRowSchema(new TimestampSpec("time", "yyyyMMdd", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "name", "score"))), ColumnsFilter.all()), new CsvInputFormat(ImmutableList.of("time", "name", "score"), null, null, false, 0), files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator(), temporaryFolder.newFolder());
try (CloseableIterator<InputRow> iterator = firehose.read()) {
int i = 0;
while (iterator.hasNext()) {
InputRow row = iterator.next();
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 1)), row.getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(row.getDimension("name")));
Assert.assertEquals(Integer.toString(i), Iterables.getOnlyElement(row.getDimension("score")));
Assert.assertTrue(iterator.hasNext());
row = iterator.next();
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 2)), row.getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", i + 1), Iterables.getOnlyElement(row.getDimension("name")));
Assert.assertEquals(Integer.toString(i + 1), Iterables.getOnlyElement(row.getDimension("score")));
i++;
}
Assert.assertEquals(numFiles, i);
}
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class JsonReaderTest method testSampleEmptyText.
@Test
public void testSampleEmptyText() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
false);
// input is empty
final ByteEntity source = new ByteEntity(StringUtils.toUtf8(""));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
// the total num of iteration is 1
final int numExpectedIterations = 1;
try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
numActualIterations++;
final InputRowListPlusRawValues rawValues = iterator.next();
Assert.assertNotNull(rawValues.getParseException());
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
Aggregations