use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class BatchDeltaIngestionTest method testReindexingWithPartialWindow.
@Test
public void testReindexingWithPartialWindow() throws Exception {
List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_PARTIAL));
HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(ImmutableMap.<String, Object>of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "xyz", "interval", INTERVAL_FULL), "segments", segments), temporaryFolder.newFolder());
List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 100L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 150L, "unique_hosts", 1.0d));
testIngestion(config, expectedRows, Iterables.getOnlyElement(segments));
}
use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class DatasourcePathSpecTest method testAddInputPaths.
@Test
public void testAddInputPaths() throws Exception {
HadoopDruidIndexerConfig hadoopIndexerConfig = makeHadoopDruidIndexerConfig();
ObjectMapper mapper = new DefaultObjectMapper();
DatasourcePathSpec pathSpec = new DatasourcePathSpec(mapper, segments, ingestionSpec, null);
Configuration config = new Configuration();
Job job = EasyMock.createNiceMock(Job.class);
EasyMock.expect(job.getConfiguration()).andReturn(config).anyTimes();
EasyMock.replay(job);
pathSpec.addInputPaths(hadoopIndexerConfig, job);
List<WindowedDataSegment> actualSegments = mapper.readValue(config.get(DatasourceInputFormat.CONF_INPUT_SEGMENTS), new TypeReference<List<WindowedDataSegment>>() {
});
Assert.assertEquals(segments, actualSegments);
DatasourceIngestionSpec actualIngestionSpec = mapper.readValue(config.get(DatasourceInputFormat.CONF_DRUID_SCHEMA), DatasourceIngestionSpec.class);
Assert.assertEquals(ingestionSpec.withDimensions(ImmutableList.of("product")).withMetrics(ImmutableList.of("visited_sum")), actualIngestionSpec);
}
use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File descriptor = new File(segmentFolder, "descriptor.json");
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(0, spec.getPartitionNum());
Assert.assertEquals(1, spec.getPartitions());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows);
}
Aggregations