use of io.druid.timeline.DataSegment in project druid by druid-io.
the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.
public static HadoopIngestionSpec updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentLister segmentLister) throws IOException {
String dataSource = "dataSource";
String type = "type";
String multi = "multi";
String children = "children";
String segments = "segments";
String ingestionSpec = "ingestionSpec";
Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
Map<String, Object> datasourcePathSpec = null;
if (pathSpec.get(type).equals(dataSource)) {
datasourcePathSpec = pathSpec;
} else if (pathSpec.get(type).equals(multi)) {
List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
for (Map<String, Object> childPathSpec : childPathSpecs) {
if (childPathSpec.get(type).equals(dataSource)) {
datasourcePathSpec = childPathSpec;
break;
}
}
}
if (datasourcePathSpec != null) {
Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals());
if (ingestionSpecObj.getSegments() != null) {
//ensure that user supplied segment list matches with the segmentsList obtained from db
//this safety check lets users do test-n-set kind of batch delta ingestion where the delta
//ingestion task would only run if current state of the system is same as when they submitted
//the task.
List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
if (segmentsList.size() == userSuppliedSegmentsList.size()) {
Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
for (DataSegment userSegment : userSuppliedSegmentsList) {
if (!segmentsSet.contains(userSegment)) {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
} else {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
for (DataSegment segment : segmentsList) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<WindowedDataSegment> windowedSegments = Lists.newArrayList();
for (Interval interval : ingestionSpecObj.getIntervals()) {
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
}
}
datasourcePathSpec.put(segments, windowedSegments);
}
}
return spec;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class S3DataSegmentFinder method findSegments.
@Override
public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException {
final Set<DataSegment> segments = Sets.newHashSet();
try {
Iterator<StorageObject> objectsIterator = S3Utils.storageObjectsIterator(s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, config.getMaxListingLength());
while (objectsIterator.hasNext()) {
StorageObject storageObject = objectsIterator.next();
storageObject.closeDataInputStream();
if (S3Utils.toFilename(storageObject.getKey()).equals("descriptor.json")) {
final String descriptorJson = storageObject.getKey();
String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson);
if (S3Utils.isObjectInBucket(s3Client, config.getBucket(), indexZip)) {
S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson);
try (InputStream is = indexObject.getDataInputStream()) {
final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class);
log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
if (!loadSpec.get("type").equals(S3StorageDruidModule.SCHEME) || !loadSpec.get("key").equals(indexZip)) {
loadSpec.put("type", S3StorageDruidModule.SCHEME);
loadSpec.put("key", indexZip);
if (updateDescriptor) {
log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", descriptorJson, indexObject);
S3Object newDescJsonObject = new S3Object(descriptorJson, jsonMapper.writeValueAsString(dataSegment));
s3Client.putObject(config.getBucket(), newDescJsonObject);
}
}
segments.add(dataSegment);
}
} else {
throw new SegmentLoadingException("index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip);
}
}
}
} catch (ServiceException e) {
throw new SegmentLoadingException(e, "Problem interacting with S3");
} catch (IOException e) {
throw new SegmentLoadingException(e, "IO exception");
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class);
Throwables.propagate(e);
}
return segments;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class S3DataSegmentFinderTest method testFindSegments.
@Test
public void testFindSegments() throws Exception {
final S3DataSegmentFinder s3DataSegmentFinder = new S3DataSegmentFinder(mockS3Client, config, mapper);
final Set<DataSegment> segments = s3DataSegmentFinder.findSegments("", false);
Assert.assertEquals(5, segments.size());
DataSegment updatedSegment1 = null;
DataSegment updatedSegment2 = null;
DataSegment updatedSegment3 = null;
DataSegment updatedSegment4_0 = null;
DataSegment updatedSegment4_1 = null;
for (DataSegment dataSegment : segments) {
if (dataSegment.getIdentifier().equals(SEGMENT_1.getIdentifier())) {
updatedSegment1 = dataSegment;
} else if (dataSegment.getIdentifier().equals(SEGMENT_2.getIdentifier())) {
updatedSegment2 = dataSegment;
} else if (dataSegment.getIdentifier().equals(SEGMENT_3.getIdentifier())) {
updatedSegment3 = dataSegment;
} else if (dataSegment.getIdentifier().equals(SEGMENT_4_0.getIdentifier())) {
updatedSegment4_0 = dataSegment;
} else if (dataSegment.getIdentifier().equals(SEGMENT_4_1.getIdentifier())) {
updatedSegment4_1 = dataSegment;
} else {
Assert.fail("Unexpected segment identifier : " + dataSegment.getIdentifier());
}
}
Assert.assertEquals(descriptor1, getDescriptorPath(updatedSegment1));
Assert.assertEquals(descriptor2, getDescriptorPath(updatedSegment2));
Assert.assertEquals(descriptor3, getDescriptorPath(updatedSegment3));
Assert.assertEquals(descriptor4_0, getDescriptorPath(updatedSegment4_0));
Assert.assertEquals(descriptor4_1, getDescriptorPath(updatedSegment4_1));
final String serializedSegment1 = mapper.writeValueAsString(updatedSegment1);
final String serializedSegment2 = mapper.writeValueAsString(updatedSegment2);
final String serializedSegment3 = mapper.writeValueAsString(updatedSegment3);
final String serializedSegment4_0 = mapper.writeValueAsString(updatedSegment4_0);
final String serializedSegment4_1 = mapper.writeValueAsString(updatedSegment4_1);
Assert.assertNotEquals(serializedSegment1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream()));
Assert.assertNotEquals(serializedSegment2, IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream()));
Assert.assertNotEquals(serializedSegment3, IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream()));
Assert.assertNotEquals(serializedSegment4_0, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream()));
Assert.assertNotEquals(serializedSegment4_1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream()));
final Set<DataSegment> segments2 = s3DataSegmentFinder.findSegments("", true);
Assert.assertEquals(segments, segments2);
Assert.assertEquals(serializedSegment1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream()));
Assert.assertEquals(serializedSegment2, IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream()));
Assert.assertEquals(serializedSegment3, IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream()));
Assert.assertEquals(serializedSegment4_0, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream()));
Assert.assertEquals(serializedSegment4_1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream()));
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class S3DataSegmentMoverTest method testFailsToMoveMissing.
@Test(expected = SegmentLoadingException.class)
public void testFailsToMoveMissing() throws Exception {
MockStorageService mockS3Client = new MockStorageService();
S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig());
mover.move(new DataSegment("test", new Interval("2013-01-01/2013-01-02"), "1", ImmutableMap.<String, Object>of("key", "baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", "bucket", "DOES NOT EXIST"), ImmutableList.of("dim1", "dim1"), ImmutableList.of("metric1", "metric2"), NoneShardSpec.instance(), 0, 1), ImmutableMap.<String, Object>of("bucket", "DOES NOT EXIST", "baseKey", "baseKey2"));
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class S3DataSegmentMoverTest method testMoveNoop.
@Test
public void testMoveNoop() throws Exception {
MockStorageService mockS3Client = new MockStorageService();
S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig());
mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip"));
mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json"));
DataSegment movedSegment = mover.move(sourceSegment, ImmutableMap.<String, Object>of("baseKey", "targetBaseKey", "bucket", "archive"));
Map<String, Object> targetLoadSpec = movedSegment.getLoadSpec();
Assert.assertEquals("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", MapUtils.getString(targetLoadSpec, "key"));
Assert.assertEquals("archive", MapUtils.getString(targetLoadSpec, "bucket"));
Assert.assertFalse(mockS3Client.didMove());
}
Aggregations