use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class OrcReaderTest method testOrcSplitElim.
// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcSplitElim() throws IOException {
final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), new OrcInputFormat(new JSONPathSpec(true, null), null, new Configuration()), "example/orc_split_elim.orc");
try (CloseableIterator<InputRow> iterator = reader.read()) {
int actualRowCount = 0;
Assert.assertTrue(iterator.hasNext());
final InputRow row = iterator.next();
actualRowCount++;
Assert.assertEquals(DateTimes.of("1969-12-31T16:00:00.0Z"), row.getTimestamp());
Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("userid")));
Assert.assertEquals("foo", Iterables.getOnlyElement(row.getDimension("string1")));
Assert.assertEquals("0.8", Iterables.getOnlyElement(row.getDimension("subtype")));
Assert.assertEquals("1.2", Iterables.getOnlyElement(row.getDimension("decimal1")));
while (iterator.hasNext()) {
actualRowCount++;
iterator.next();
}
Assert.assertEquals(25000, actualRowCount);
}
}
use of org.apache.druid.data.input.impl.TimestampSpec in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class FirehoseFactoryToInputSourceAdaptorTest method testUnimplementedInputFormat.
@Test
public void testUnimplementedInputFormat() throws IOException {
final List<String> lines = new ArrayList<>();
for (int i = 0; i < 10; i++) {
lines.add(StringUtils.format("%d,name_%d,%d", 20190101 + i, i, i + 100));
}
final TestFirehoseFactory firehoseFactory = new TestFirehoseFactory(lines);
final StringInputRowParser inputRowParser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec(null, "yyyyMMdd", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "name", "score"))), ",", Arrays.asList("timestamp", "name", "score"), false, 0), StringUtils.UTF8_STRING);
final FirehoseFactoryToInputSourceAdaptor inputSourceAdaptor = new FirehoseFactoryToInputSourceAdaptor(firehoseFactory, inputRowParser);
final InputSourceReader reader = inputSourceAdaptor.reader(new InputRowSchema(inputRowParser.getParseSpec().getTimestampSpec(), inputRowParser.getParseSpec().getDimensionsSpec(), ColumnsFilter.all()), null, null);
final List<InputRow> result = new ArrayList<>();
try (CloseableIterator<InputRow> iterator = reader.read()) {
while (iterator.hasNext()) {
result.add(iterator.next());
}
}
Assert.assertEquals(10, result.size());
for (int i = 0; i < 10; i++) {
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", 1 + i)), result.get(i).getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(result.get(i).getDimension("name")));
Assert.assertEquals(StringUtils.format("%d", i + 100), Iterables.getOnlyElement(result.get(i).getDimension("score")));
}
}
use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class OssInputSourceTest method testCompressedReader.
@Test
public void testCompressedReader() throws IOException {
EasyMock.reset(OSSCLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_COMPRESSED_URIS.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)), CONTENT);
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(0));
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(1));
EasyMock.replay(OSSCLIENT);
OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(OSSCLIENT);
}
use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class FlattenJSONBenchmarkUtil method getForcedPathParser.
public Parser getForcedPathParser() {
List<JSONPathFieldSpec> fields = new ArrayList<>();
fields.add(JSONPathFieldSpec.createNestedField("ts", "$['ts']"));
fields.add(JSONPathFieldSpec.createNestedField("d1", "$['d1']"));
fields.add(JSONPathFieldSpec.createNestedField("d2", "$['d2']"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d1", "$['e1.d1']"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d2", "$['e1.d2']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d3", "$['e2.d3']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d4", "$['e2.d4']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d5", "$['e2.d5']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d6", "$['e2.d6']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[0]", "$['e2.ad1[0]']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[1]", "$['e2.ad1[1]']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[2]", "$['e2.ad1[2]']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[0].d1", "$['ae1[0].d1']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[1].d1", "$['ae1[1].d1']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[2].e1.d2", "$['ae1[2].e1.d2']"));
fields.add(JSONPathFieldSpec.createNestedField("m3", "$['m3']"));
fields.add(JSONPathFieldSpec.createNestedField("m4", "$['m4']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m1", "$['e3.m1']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m2", "$['e3.m2']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m3", "$['e3.m3']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m4", "$['e3.m4']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[0]", "$['e3.am1[0]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[1]", "$['e3.am1[1]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[2]", "$['e3.am1[2]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[3]", "$['e3.am1[3]']"));
fields.add(JSONPathFieldSpec.createNestedField("e4.e4.m4", "$['e4.e4.m4']"));
JSONPathSpec flattenSpec = new JSONPathSpec(false, fields);
JSONParseSpec spec = new JSONParseSpec(new TimestampSpec("ts", "iso", null), DimensionsSpec.EMPTY, flattenSpec, null, null);
return spec.makeParser();
}
Aggregations