use of io.druid.data.input.impl.TimestampSpec in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// This test need this patch https://github.com/druid-io/druid/pull/3483
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularity.DAY, QueryGranularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig(temporaryFolder.newFolder());
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
}, objectMapper);
Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {
@Nullable
@Override
public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularity.DAY.truncate(new DateTime((long) input.get(DruidTable.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
}
});
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getPublishedSegments(segmentDescriptroPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, QueryGranularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class MapVirtualColumnTest method constructorFeeder.
@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder() throws IOException {
final Supplier<SelectQueryConfig> selectConfigSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));
SelectQueryRunnerFactory factory = new SelectQueryRunnerFactory(new SelectQueryQueryToolChest(new DefaultObjectMapper(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), selectConfigSupplier), new SelectQueryEngine(selectConfigSupplier), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()).withQueryGranularity(Granularities.NONE).build();
final IncrementalIndex index = new OnheapIncrementalIndex(schema, true, 10000);
final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim", "keys", "values")), null, null), "\t", ",", Arrays.asList("ts", "dim", "keys", "values")), "utf8");
CharSource input = CharSource.wrap("2011-01-12T00:00:00.000Z\ta\tkey1,key2,key3\tvalue1,value2,value3\n" + "2011-01-12T00:00:00.000Z\tb\tkey4,key5,key6\tvalue4\n" + "2011-01-12T00:00:00.000Z\tc\tkey1,key5\tvalue1,value5,value9\n");
IncrementalIndex index1 = TestIndex.loadIncrementalIndex(index, input, parser);
QueryableIndex index2 = TestIndex.persistRealtimeAndLoadMMapped(index1);
return transformToConstructionFeeder(Arrays.asList(makeQueryRunner(factory, "index1", new IncrementalIndexSegment(index1, "index1"), "incremental"), makeQueryRunner(factory, "index2", new QueryableIndexSegment("index2", index2), "queryable")));
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class AvroStreamInputRowParser method parseGenericRecord.
protected static InputRow parseGenericRecord(GenericRecord record, ParseSpec parseSpec, List<String> dimensions, boolean fromPigAvroStorage, boolean binaryAsString) {
GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, fromPigAvroStorage, binaryAsString);
TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class OrcHadoopInputRowParser method parse.
@Override
public InputRow parse(OrcStruct input) {
Map<String, Object> map = Maps.newHashMap();
List<? extends StructField> fields = oip.getAllStructFieldRefs();
for (StructField field : fields) {
ObjectInspector objectInspector = field.getFieldObjectInspector();
switch(objectInspector.getCategory()) {
case PRIMITIVE:
PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) objectInspector;
map.put(field.getFieldName(), primitiveObjectInspector.getPrimitiveJavaObject(oip.getStructFieldData(input, field)));
break;
case // array case - only 1-depth array supported yet
LIST:
ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
map.put(field.getFieldName(), getListObject(listObjectInspector, oip.getStructFieldData(input, field)));
break;
default:
break;
}
}
TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
DateTime dateTime = timestampSpec.extractTimestamp(map);
return new MapBasedInputRow(dateTime, dimensions, map);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testTypeFromParseSpec.
@Test
public void testTypeFromParseSpec() {
ParseSpec parseSpec = new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null));
String typeString = OrcHadoopInputRowParser.typeStringFromParseSpec(parseSpec);
String expected = "struct<timestamp:string,col1:string,col2:string>";
Assert.assertEquals(expected, typeString);
}
Aggregations