use of org.apache.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class BatchDeltaIngestionTest method testReindexingWithPartialWindow.
@Test
public void testReindexingWithPartialWindow() throws Exception {
List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_PARTIAL));
HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(ImmutableMap.of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "testds", "interval", INTERVAL_FULL), "segments", segments), temporaryFolder.newFolder());
List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.of("time", DateTimes.of("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 100L, "unique_hosts", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 150L, "unique_hosts", 1.0d));
testIngestion(config, expectedRows, Iterables.getOnlyElement(segments), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"));
}
use of org.apache.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest method testUpdateSegmentListIfDatasourcePathSpecIsUsedWithJustDatasourcePathSpecAndPartialInterval.
@Test
public void testUpdateSegmentListIfDatasourcePathSpecIsUsedWithJustDatasourcePathSpecAndPartialInterval() throws Exception {
PathSpec pathSpec = new DatasourcePathSpec(null, new DatasourceIngestionSpec(TEST_DATA_SOURCE, TEST_DATA_SOURCE_INTERVAL_PARTIAL, null, null, null, null, null, false, null), null, false);
HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, TEST_DATA_SOURCE_INTERVAL_PARTIAL);
Assert.assertEquals(ImmutableList.of(new WindowedDataSegment(SEGMENT, TEST_DATA_SOURCE_INTERVAL_PARTIAL)), ((DatasourcePathSpec) config.getPathSpec()).getSegments());
}
use of org.apache.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest method testUpdateSegmentListIfDatasourcePathSpecIsUsedWithMultiplePathSpec.
@Test
public void testUpdateSegmentListIfDatasourcePathSpecIsUsedWithMultiplePathSpec() throws Exception {
PathSpec pathSpec = new MultiplePathSpec(ImmutableList.of(new StaticPathSpec("/xyz", null), new DatasourcePathSpec(null, new DatasourceIngestionSpec(TEST_DATA_SOURCE, TEST_DATA_SOURCE_INTERVAL, null, null, null, null, null, false, null), null, false), new DatasourcePathSpec(null, new DatasourceIngestionSpec(TEST_DATA_SOURCE2, TEST_DATA_SOURCE_INTERVAL2, null, null, null, null, null, false, null), null, false)));
HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(pathSpec, TEST_DATA_SOURCE_INTERVAL);
Assert.assertEquals(ImmutableList.of(WindowedDataSegment.of(SEGMENT)), ((DatasourcePathSpec) ((MultiplePathSpec) config.getPathSpec()).getChildren().get(1)).getSegments());
Assert.assertEquals(ImmutableList.of(new WindowedDataSegment(SEGMENT2, TEST_DATA_SOURCE_INTERVAL2)), ((DatasourcePathSpec) ((MultiplePathSpec) config.getPathSpec()).getChildren().get(2)).getSegments());
}
use of org.apache.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class BatchDeltaIngestionTest method testDeltaIngestion.
@Test
public void testDeltaIngestion() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File dataFile1 = new File(tmpDir, "data1");
FileUtils.writeLines(dataFile1, ImmutableList.of("2014102200,a.example.com,a.example.com,90", "2014102201,b.example.com,b.example.com,25"));
File dataFile2 = new File(tmpDir, "data2");
FileUtils.writeLines(dataFile2, ImmutableList.of("2014102202,c.example.com,c.example.com,70"));
// using a hadoop glob path to test that it continues to work with hadoop MultipleInputs usage and not
// affected by
// https://issues.apache.org/jira/browse/MAPREDUCE-5061
String inputPath = tmpDir.getPath() + "/{data1,data2}";
List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_FULL));
HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(ImmutableMap.of("type", "multi", "children", ImmutableList.of(ImmutableMap.of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "testds", "interval", INTERVAL_FULL), "segments", segments), ImmutableMap.<String, Object>of("type", "static", "paths", inputPath))), temporaryFolder.newFolder());
List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.of("time", DateTimes.of("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 190L, "unique_hosts", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 175L, "unique_hosts", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T02:00:00.000Z"), "host", ImmutableList.of("c.example.com"), "visited_sum", 270L, "unique_hosts", 1.0d));
testIngestion(config, expectedRows, Iterables.getOnlyElement(segments), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"));
}
use of org.apache.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.
the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.
public static void updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentsRetriever segmentsRetriever) throws IOException {
String dataSource = "dataSource";
String type = "type";
String multi = "multi";
String children = "children";
String segments = "segments";
String ingestionSpec = "ingestionSpec";
Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
List<Map<String, Object>> datasourcePathSpecs = new ArrayList<>();
if (pathSpec.get(type).equals(dataSource)) {
datasourcePathSpecs.add(pathSpec);
} else if (pathSpec.get(type).equals(multi)) {
List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
for (Map<String, Object> childPathSpec : childPathSpecs) {
if (childPathSpec.get(type).equals(dataSource)) {
datasourcePathSpecs.add(childPathSpec);
}
}
}
for (Map<String, Object> datasourcePathSpec : datasourcePathSpecs) {
Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
Collection<DataSegment> usedVisibleSegments = segmentsRetriever.retrieveUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals(), Segments.ONLY_VISIBLE);
if (ingestionSpecObj.getSegments() != null) {
// ensure that user supplied segment list matches with the usedVisibleSegments obtained from db
// this safety check lets users do test-n-set kind of batch delta ingestion where the delta
// ingestion task would only run if current state of the system is same as when they submitted
// the task.
List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
if (usedVisibleSegments.size() == userSuppliedSegmentsList.size()) {
Set<DataSegment> segmentsSet = new HashSet<>(usedVisibleSegments);
for (DataSegment userSegment : userSuppliedSegmentsList) {
if (!segmentsSet.contains(userSegment)) {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
} else {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(usedVisibleSegments);
final List<WindowedDataSegment> windowedSegments = new ArrayList<>();
for (Interval interval : ingestionSpecObj.getIntervals()) {
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
}
}
datasourcePathSpec.put(segments, windowedSegments);
}
}
}
Aggregations