use of org.apache.druid.segment.Metadata in project presto by prestodb.
the class V9SegmentIndexSource method loadIndex.
@Override
public QueryableIndex loadIndex(List<ColumnHandle> columnHandles) throws IOException {
ByteBuffer indexBuffer = ByteBuffer.wrap(segmentColumnSource.getColumnData(INDEX_METADATA_FILE_NAME));
GenericIndexed.read(indexBuffer, STRING_STRATEGY);
GenericIndexed<String> allDimensions = GenericIndexed.read(indexBuffer, STRING_STRATEGY);
Interval dataInterval = Intervals.utc(indexBuffer.getLong(), indexBuffer.getLong());
BitmapSerdeFactory segmentBitmapSerdeFactory;
if (indexBuffer.hasRemaining()) {
segmentBitmapSerdeFactory = JSON_MAPPER.readValue(SERIALIZER_UTILS.readString(indexBuffer), BitmapSerdeFactory.class);
} else {
segmentBitmapSerdeFactory = new BitmapSerde.LegacyBitmapSerdeFactory();
}
Metadata metadata = null;
ByteBuffer metadataBuffer = ByteBuffer.wrap(segmentColumnSource.getColumnData(SEGMENT_METADATA_FILE_NAME));
try {
metadata = JSON_MAPPER.readValue(SERIALIZER_UTILS.readBytes(metadataBuffer, metadataBuffer.remaining()), Metadata.class);
} catch (JsonParseException | JsonMappingException e) {
// Any jackson deserialization errors are ignored e.g. if metadata contains some aggregator which
// is no longer supported then it is OK to not use the metadata instead of failing segment loading
log.warn(e, "Failed to load metadata for segment");
}
Map<String, Supplier<ColumnHolder>> columns = new HashMap<>();
for (ColumnHandle columnHandle : columnHandles) {
String columnName = ((DruidColumnHandle) columnHandle).getColumnName();
columns.put(columnName, () -> createColumnHolder(columnName));
}
List<String> availableDimensions = Streams.stream(allDimensions.iterator()).filter(columns::containsKey).collect(toImmutableList());
columns.put(TIME_COLUMN_NAME, () -> createColumnHolder(TIME_COLUMN_NAME));
Indexed<String> indexed = new ListIndexed<>(availableDimensions);
// TODO: get rid of the time column by creating Presto's SimpleQueryableIndex impl
return new SimpleQueryableIndex(dataInterval, indexed, segmentBitmapSerdeFactory.getBitmapFactory(), columns, null, metadata, false);
}
use of org.apache.druid.segment.Metadata in project druid by druid-io.
the class SegmentMetadataQueryRunnerFactory method createRunner.
@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
return new QueryRunner<SegmentAnalysis>() {
@Override
public Sequence<SegmentAnalysis> run(QueryPlus<SegmentAnalysis> inQ, ResponseContext responseContext) {
SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()).withFinalizedAnalysisTypes(toolChest.getConfig());
final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes());
final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
final long numRows = analyzer.numRows(segment);
long totalSize = 0;
if (analyzer.analyzingSize()) {
// Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * numRows;
}
Map<String, ColumnAnalysis> columns = new TreeMap<>();
ColumnIncluderator includerator = updatedQuery.getToInclude();
for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
final String columnName = entry.getKey();
final ColumnAnalysis column = entry.getValue();
if (!column.isError()) {
totalSize += column.getSize();
}
if (includerator.include(columnName)) {
columns.put(columnName, column);
}
}
List<Interval> retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null;
final Map<String, AggregatorFactory> aggregators;
Metadata metadata = null;
if (updatedQuery.hasAggregators()) {
metadata = segment.asStorageAdapter().getMetadata();
if (metadata != null && metadata.getAggregators() != null) {
aggregators = new HashMap<>();
for (AggregatorFactory aggregator : metadata.getAggregators()) {
aggregators.put(aggregator.getName(), aggregator);
}
} else {
aggregators = null;
}
} else {
aggregators = null;
}
final TimestampSpec timestampSpec;
if (updatedQuery.hasTimestampSpec()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
} else {
timestampSpec = null;
}
final Granularity queryGranularity;
if (updatedQuery.hasQueryGranularity()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
} else {
queryGranularity = null;
}
Boolean rollup = null;
if (updatedQuery.hasRollup()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
rollup = metadata != null ? metadata.isRollup() : null;
if (rollup == null) {
// in this case, this segment is built before no-rollup function is coded,
// thus it is built with rollup
rollup = Boolean.TRUE;
}
}
return Sequences.simple(Collections.singletonList(new SegmentAnalysis(segment.getId().toString(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
}
};
}
use of org.apache.druid.segment.Metadata in project druid by druid-io.
the class RealtimePlumber method bootstrapSinksFromDisk.
protected Object bootstrapSinksFromDisk() {
final VersioningPolicy versioningPolicy = config.getVersioningPolicy();
File baseDir = computeBaseDir(schema);
if (baseDir == null || !baseDir.exists()) {
return null;
}
File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
Object metadata = null;
long latestCommitTime = 0;
for (File sinkDir : files) {
final Interval sinkInterval = Intervals.of(sinkDir.getName().replace('_', '/'));
// final File[] sinkFiles = sinkDir.listFiles();
// To avoid reading and listing of "merged" dir
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
try {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
} catch (NumberFormatException e) {
log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2);
return o1.compareTo(o2);
}
}
});
boolean isCorrupted = false;
List<FireHydrant> hydrants = new ArrayList<>();
for (File segmentDir : sinkFiles) {
log.info("Loading previously persisted segment at [%s]", segmentDir);
// If 100% sure that this is not needed, this check can be removed.
if (Ints.tryParse(segmentDir.getName()) == null) {
continue;
}
QueryableIndex queryableIndex = null;
try {
queryableIndex = indexIO.loadIndex(segmentDir);
} catch (IOException e) {
log.error(e, "Problem loading segmentDir from disk.");
isCorrupted = true;
}
if (isCorrupted) {
try {
File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema);
log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath());
org.apache.commons.io.FileUtils.copyDirectory(segmentDir, corruptSegmentDir);
FileUtils.deleteDirectory(segmentDir);
} catch (Exception e1) {
log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath());
}
// at some point.
continue;
}
Metadata segmentMetadata = queryableIndex.getMetadata();
if (segmentMetadata != null) {
Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY);
if (timestampObj != null) {
long timestamp = ((Long) timestampObj).longValue();
if (timestamp > latestCommitTime) {
log.info("Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime);
latestCommitTime = timestamp;
metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY);
}
}
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(queryableIndex, SegmentId.of(schema.getDataSource(), sinkInterval, versioningPolicy.getVersion(sinkInterval), config.getShardSpec())), Integer.parseInt(segmentDir.getName())));
}
if (hydrants.isEmpty()) {
// Probably encountered a corrupt sink directory
log.warn("Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath());
continue;
}
final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getAppendableIndexSpec(), config.getMaxRowsInMemory(), config.getMaxBytesInMemoryOrDefault(), true, config.getDedupColumn(), hydrants);
addSink(currSink);
}
return metadata;
}
Aggregations