use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class GranularityPathSpecTest method testAddInputPath.
@Test
public void testAddInputPath() throws Exception {
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(new Interval("2015-11-06T00:00Z/2015-11-07T00:00Z"))), jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
granularityPathSpec.setDataGranularity(Granularities.HOUR);
granularityPathSpec.setFilePattern(".*");
granularityPathSpec.setInputFormat(TextInputFormat.class);
Job job = Job.getInstance();
String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
String expected = Joiner.on(",").join(Lists.newArrayList(String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
Assert.assertEquals("Did not find expected input paths", expected, actual);
}
use of io.druid.segment.indexing.DataSchema in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, Map.class);
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, 0, 0, null, null, 0L);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
}, objectMapper);
Path segmentDescriptroPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptroPath, localFileSystem);
List<DruidWritable> druidWritables = Lists.transform(expectedRows, new Function<ImmutableMap<String, Object>, DruidWritable>() {
@Nullable
@Override
public DruidWritable apply(@Nullable ImmutableMap<String, Object> input) {
return new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build());
}
});
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptroPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class DefaultOfflineAppenderatorFactoryTest method testBuild.
@Test
public void testBuild() throws IOException, SegmentNotWritableException {
Injector injector = Initialization.makeInjectorWithModules(GuiceInjectors.makeStartupInjector(), ImmutableList.<Module>of(new Module() {
@Override
public void configure(Binder binder) {
binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/tool");
binder.bindConstant().annotatedWith(Names.named("servicePort")).to(9999);
binder.bind(DruidProcessingConfig.class).toInstance(new DruidProcessingConfig() {
@Override
public String getFormatString() {
return "processing-%s";
}
@Override
public int intermediateComputeSizeBytes() {
return 100 * 1024 * 1024;
}
@Override
public int getNumThreads() {
return 1;
}
@Override
public int columnCacheSizeBytes() {
return 25 * 1024 * 1024;
}
});
binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);
}
}));
ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class);
AppenderatorFactory defaultOfflineAppenderatorFactory = objectMapper.reader(AppenderatorFactory.class).readValue("{\"type\":\"offline\"}");
final Map<String, Object> parserMap = objectMapper.convertValue(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "auto", null), new DimensionsSpec(null, null, null), null, null)), Map.class);
DataSchema schema = new DataSchema("dataSourceName", parserMap, new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), objectMapper);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(75000, null, null, temporaryFolder.newFolder(), null, null, null, null, null, null, 0, 0, null, null);
try (Appenderator appenderator = defaultOfflineAppenderatorFactory.build(schema, tuningConfig, new FireDepartmentMetrics())) {
Assert.assertEquals("dataSourceName", appenderator.getDataSource());
Assert.assertEquals(null, appenderator.startJob());
SegmentIdentifier identifier = new SegmentIdentifier("dataSourceName", new Interval("2000/2001"), "A", new LinearShardSpec(0));
Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.add(identifier, AppenderatorTest.IR("2000", "bar", 1), Suppliers.ofInstance(Committers.nil()));
Assert.assertEquals(1, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.add(identifier, AppenderatorTest.IR("2000", "baz", 1), Suppliers.ofInstance(Committers.nil()));
Assert.assertEquals(2, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.close();
Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
}
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class SinkTest method testSwap.
@Test
public void testSwap() throws Exception {
final DataSchema schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), new DefaultObjectMapper());
final Interval interval = new Interval("2013-01-01/2013-01-02");
final String version = new DateTime().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(100, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions());
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return Lists.newArrayList();
}
@Override
public long getTimestampFromEpoch() {
return new DateTime("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return new DateTime("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return Lists.newArrayList();
}
@Override
public float getFloatMetric(String metric) {
return 0;
}
@Override
public long getLongMetric(String metric) {
return 0L;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
});
FireHydrant currHydrant = sink.getCurrHydrant();
Assert.assertEquals(new Interval("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
FireHydrant swapHydrant = sink.swap();
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return Lists.newArrayList();
}
@Override
public long getTimestampFromEpoch() {
return new DateTime("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return new DateTime("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return Lists.newArrayList();
}
@Override
public float getFloatMetric(String metric) {
return 0;
}
@Override
public long getLongMetric(String metric) {
return 0L;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
});
Assert.assertEquals(currHydrant, swapHydrant);
Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
Assert.assertEquals(new Interval("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class FireDepartmentTest method testSerde.
@Test
public void testSerde() throws Exception {
ObjectMapper jsonMapper = new DefaultObjectMapper();
jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper));
FireDepartment schema = new FireDepartment(new DataSchema("foo", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2")), null, null), null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("count") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), jsonMapper), new RealtimeIOConfig(null, new RealtimePlumberSchool(null, null, null, null, null, null, null, TestHelper.getTestIndexMerger(), TestHelper.getTestIndexMergerV9(), TestHelper.getTestIndexIO(), MapCache.create(0), NO_CACHE_CONFIG, TestHelper.getObjectMapper()), null), RealtimeTuningConfig.makeDefaultTuningConfig(new File("/tmp/nonexistent")));
String json = jsonMapper.writeValueAsString(schema);
FireDepartment newSchema = jsonMapper.readValue(json, FireDepartment.class);
Assert.assertEquals(schema.getDataSchema().getDataSource(), newSchema.getDataSchema().getDataSource());
Assert.assertEquals("/tmp/nonexistent", schema.getTuningConfig().getBasePersistDirectory().toString());
}
Aggregations