use of org.apache.druid.java.util.common.UOE in project druid by druid-io.
the class IndexTask method determineShardSpecs.
/**
* Determines intervals and shardSpecs for input data. This method first checks that it must determine intervals and
* shardSpecs by itself. Intervals must be determined if they are not specified in {@link GranularitySpec}.
* ShardSpecs must be determined if the perfect rollup must be guaranteed even though the number of shards is not
* specified in {@link IndexTuningConfig}.
* <p/>
* If both intervals and shardSpecs don't have to be determined, this method simply returns {@link ShardSpecs} for the
* given intervals. Here, if {@link HashedPartitionsSpec#numShards} is not specified, {@link NumberedShardSpec} is
* used.
* <p/>
* If one of intervals or shardSpecs need to be determined, this method reads the entire input for determining one of
* them. If the perfect rollup must be guaranteed, {@link HashBasedNumberedShardSpec} is used for hash partitioning
* of input data. In the future we may want to also support single-dimension partitioning.
*
* @return a map indicating how many shardSpecs need to be created per interval.
*/
private PartitionAnalysis determineShardSpecs(final TaskToolbox toolbox, final InputSource inputSource, final File tmpDir, @Nonnull final PartitionsSpec partitionsSpec) throws IOException {
final ObjectMapper jsonMapper = toolbox.getJsonMapper();
final GranularitySpec granularitySpec = ingestionSchema.getDataSchema().getGranularitySpec();
// Must determine intervals if unknown, since we acquire all locks before processing any data.
final boolean determineIntervals = granularitySpec.inputIntervals().isEmpty();
// Must determine partitions if rollup is guaranteed and the user didn't provide a specific value.
final boolean determineNumPartitions = partitionsSpec.needsDeterminePartitions(false);
// if we were given number of shards per interval and the intervals, we don't need to scan the data
if (!determineNumPartitions && !determineIntervals) {
log.info("Skipping determine partition scan");
if (partitionsSpec.getType() == SecondaryPartitionType.HASH) {
return PartialHashSegmentGenerateTask.createHashPartitionAnalysisFromPartitionsSpec(granularitySpec, (HashedPartitionsSpec) partitionsSpec, // not overriding numShards
null);
} else if (partitionsSpec.getType() == SecondaryPartitionType.LINEAR) {
return createLinearPartitionAnalysis(granularitySpec, (DynamicPartitionsSpec) partitionsSpec);
} else {
throw new UOE("%s", partitionsSpec.getClass().getName());
}
} else {
// determine intervals containing data and prime HLL collectors
log.info("Determining intervals and shardSpecs");
return createShardSpecsFromInput(jsonMapper, ingestionSchema, inputSource, tmpDir, granularitySpec, partitionsSpec, determineIntervals);
}
}
use of org.apache.druid.java.util.common.UOE in project druid by druid-io.
the class OssDataSegmentPuller method buildFileObject.
private FileObject buildFileObject(final URI uri) throws OSSException {
final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
final String path = uri.getPath();
return new FileObject() {
OSSObject ossObject = null;
@Override
public URI toUri() {
return uri;
}
@Override
public String getName() {
final String ext = Files.getFileExtension(path);
return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
}
/**
* Returns an input stream for an OSS object. The returned input stream is not thread-safe.
*/
@Override
public InputStream openInputStream() throws IOException {
try {
if (ossObject == null) {
// lazily promote to full GET
ossObject = client.getObject(objectSummary.getBucketName(), objectSummary.getKey());
}
final InputStream in = ossObject.getObjectContent();
final Closer closer = Closer.create();
closer.register(in);
closer.register(ossObject);
return new FilterInputStream(in) {
@Override
public void close() throws IOException {
closer.close();
}
};
} catch (OSSException e) {
throw new IOE(e, "Could not load OSS URI [%s]", uri);
}
}
@Override
public OutputStream openOutputStream() {
throw new UOE("Cannot stream OSS output");
}
@Override
public Reader openReader(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open reader");
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open character sequence");
}
@Override
public Writer openWriter() {
throw new UOE("Cannot open writer");
}
@Override
public long getLastModified() {
return objectSummary.getLastModified().getTime();
}
@Override
public boolean delete() {
throw new UOE("Cannot delete OSS items anonymously. jetS3t doesn't support authenticated deletes easily.");
}
};
}
use of org.apache.druid.java.util.common.UOE in project druid by druid-io.
the class VersionedIntervalTimeline method addAtKey.
/**
* @return boolean flag indicating whether or not we inserted or discarded something
*/
@GuardedBy("lock")
private boolean addAtKey(NavigableMap<Interval, TimelineEntry> timeline, Interval key, TimelineEntry entry) {
boolean retVal = false;
Interval currKey = key;
Interval entryInterval = entry.getTrueInterval();
if (!currKey.overlaps(entryInterval)) {
return false;
}
while (entryInterval != null && currKey != null && currKey.overlaps(entryInterval)) {
final Interval nextKey = timeline.higherKey(currKey);
final int versionCompare = versionComparator.compare(entry.getVersion(), timeline.get(currKey).getVersion());
if (versionCompare < 0) {
// if overlapped.
if (currKey.contains(entryInterval)) {
// the version of the entry of currKey is larger than that of the given entry. Discard it
return true;
} else if (currKey.getStart().isBefore(entryInterval.getStart())) {
// | entry |
// | cur |
// => |new|
entryInterval = new Interval(currKey.getEnd(), entryInterval.getEnd());
} else {
// | entry |
// | cur |
// => |new|
addIntervalToTimeline(new Interval(entryInterval.getStart(), currKey.getStart()), entry, timeline);
// => |new|
if (entryInterval.getEnd().isAfter(currKey.getEnd())) {
entryInterval = new Interval(currKey.getEnd(), entryInterval.getEnd());
} else {
// Discard this entry since there is no portion of the entry interval that goes past the end of the curr
// key interval.
entryInterval = null;
}
}
} else if (versionCompare > 0) {
// since the entry version is greater than the existing one, the given entry overwrites the existing one
// if overlapped.
final TimelineEntry oldEntry = timeline.remove(currKey);
if (currKey.contains(entryInterval)) {
// | cur |
// | entry |
// => |old| new |old|
addIntervalToTimeline(new Interval(currKey.getStart(), entryInterval.getStart()), oldEntry, timeline);
addIntervalToTimeline(new Interval(entryInterval.getEnd(), currKey.getEnd()), oldEntry, timeline);
addIntervalToTimeline(entryInterval, entry, timeline);
return true;
} else if (currKey.getStart().isBefore(entryInterval.getStart())) {
// | cur |
// | entry |
// => |old|
addIntervalToTimeline(new Interval(currKey.getStart(), entryInterval.getStart()), oldEntry, timeline);
} else if (entryInterval.getEnd().isBefore(currKey.getEnd())) {
// | cur |
// | entry |
// => |old|
addIntervalToTimeline(new Interval(entryInterval.getEnd(), currKey.getEnd()), oldEntry, timeline);
}
} else {
if (timeline.get(currKey).equals(entry)) {
// This occurs when restoring segments
timeline.remove(currKey);
} else {
throw new UOE("Cannot add overlapping segments [%s and %s] with the same version [%s]", currKey, entryInterval, entry.getVersion());
}
}
currKey = nextKey;
retVal = true;
}
addIntervalToTimeline(entryInterval, entry, timeline);
return retVal;
}
use of org.apache.druid.java.util.common.UOE in project druid by druid-io.
the class UriCacheGeneratorTest method getParameters.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> getParameters() {
final List<Object[]> compressionParams = ImmutableList.of(new Object[] { ".dat", new Function<File, OutputStream>() {
@Nullable
@Override
public OutputStream apply(@Nullable File outFile) {
try {
return new FileOutputStream(outFile);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
} }, new Object[] { ".gz", new Function<File, OutputStream>() {
@Nullable
@Override
public OutputStream apply(@Nullable File outFile) {
try {
final FileOutputStream fos = new FileOutputStream(outFile);
return new GZIPOutputStream(fos) {
@Override
public void close() throws IOException {
try {
super.close();
} finally {
fos.close();
}
}
};
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
} });
final List<Function<Lifecycle, NamespaceExtractionCacheManager>> cacheManagerCreators = ImmutableList.of(new Function<Lifecycle, NamespaceExtractionCacheManager>() {
@Override
public NamespaceExtractionCacheManager apply(Lifecycle lifecycle) {
return new OnHeapNamespaceExtractionCacheManager(lifecycle, new NoopServiceEmitter(), new NamespaceExtractionConfig());
}
}, new Function<Lifecycle, NamespaceExtractionCacheManager>() {
@Override
public NamespaceExtractionCacheManager apply(Lifecycle lifecycle) {
return new OffHeapNamespaceExtractionCacheManager(lifecycle, new NoopServiceEmitter(), new NamespaceExtractionConfig());
}
});
return new Iterable<Object[]>() {
@Override
public Iterator<Object[]> iterator() {
return new Iterator<Object[]>() {
Iterator<Object[]> compressionIt = compressionParams.iterator();
Iterator<Function<Lifecycle, NamespaceExtractionCacheManager>> cacheManagerCreatorsIt = cacheManagerCreators.iterator();
Object[] compressions = compressionIt.next();
@Override
public boolean hasNext() {
return compressionIt.hasNext() || cacheManagerCreatorsIt.hasNext();
}
@Override
public Object[] next() {
if (cacheManagerCreatorsIt.hasNext()) {
Function<Lifecycle, NamespaceExtractionCacheManager> cacheManagerCreator = cacheManagerCreatorsIt.next();
return new Object[] { compressions[0], compressions[1], cacheManagerCreator };
} else {
cacheManagerCreatorsIt = cacheManagerCreators.iterator();
compressions = compressionIt.next();
return next();
}
}
@Override
public void remove() {
throw new UOE("Cannot remove");
}
};
}
};
}
use of org.apache.druid.java.util.common.UOE in project druid by druid-io.
the class S3DataSegmentPuller method buildFileObject.
public FileObject buildFileObject(final URI uri) throws AmazonServiceException {
final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
final String path = uri.getPath();
return new FileObject() {
S3Object s3Object = null;
S3ObjectSummary objectSummary = null;
@Override
public URI toUri() {
return uri;
}
@Override
public String getName() {
final String ext = Files.getFileExtension(path);
return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
}
/**
* Returns an input stream for a s3 object. The returned input stream is not thread-safe.
*/
@Override
public InputStream openInputStream() throws IOException {
try {
if (s3Object == null) {
// lazily promote to full GET
s3Object = s3Client.getObject(coords.getBucket(), coords.getPath());
}
final InputStream in = s3Object.getObjectContent();
final Closer closer = Closer.create();
closer.register(in);
closer.register(s3Object);
return new FilterInputStream(in) {
@Override
public void close() throws IOException {
closer.close();
}
};
} catch (AmazonServiceException e) {
throw new IOE(e, "Could not load S3 URI [%s]", uri);
}
}
@Override
public OutputStream openOutputStream() {
throw new UOE("Cannot stream S3 output");
}
@Override
public Reader openReader(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open reader");
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open character sequence");
}
@Override
public Writer openWriter() {
throw new UOE("Cannot open writer");
}
@Override
public long getLastModified() {
if (s3Object != null) {
return s3Object.getObjectMetadata().getLastModified().getTime();
}
if (objectSummary == null) {
objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
}
return objectSummary.getLastModified().getTime();
}
@Override
public boolean delete() {
throw new UOE("Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily.");
}
};
}
Aggregations