use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.
the class SegmentLoadDropHandlerCacheTest method setup.
@Before
public void setup() throws IOException {
storageLoc = new TestStorageLocation(temporaryFolder);
SegmentLoaderConfig config = new SegmentLoaderConfig().withLocations(Collections.singletonList(storageLoc.toStorageLocationConfig(MAX_SIZE, null))).withInfoDir(storageLoc.getInfoDir());
objectMapper = TestHelper.makeJsonMapper();
objectMapper.registerSubtypes(TestLoadSpec.class);
objectMapper.registerSubtypes(TestSegmentizerFactory.class);
SegmentCacheManager cacheManager = new SegmentLocalCacheManager(config, objectMapper);
SegmentManager segmentManager = new SegmentManager(new SegmentLocalCacheLoader(cacheManager, TestIndex.INDEX_IO, objectMapper));
segmentAnnouncer = Mockito.mock(DataSegmentAnnouncer.class);
loadDropHandler = new SegmentLoadDropHandler(objectMapper, config, segmentAnnouncer, Mockito.mock(DataSegmentServerAnnouncer.class), segmentManager, cacheManager, new ServerTypeConfig(ServerType.HISTORICAL));
EmittingLogger.registerEmitter(new NoopServiceEmitter());
}
use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.
the class CompactionTaskRunTest method getCSVFormatRowsFromSegments.
private List<String> getCSVFormatRowsFromSegments(List<DataSegment> segments) throws Exception {
final File cacheDir = temporaryFolder.newFolder();
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir);
List<Cursor> cursors = new ArrayList<>();
for (DataSegment segment : segments) {
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(testUtils.getTestIndexIO().loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
cursors.addAll(cursorSequence.toList());
}
List<String> rowsFromSegment = new ArrayList<>();
for (Cursor cursor : cursors) {
cursor.reset();
while (!cursor.isDone()) {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("ts", "ts"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("val", "val"));
Object dimObject = selector2.getObject();
String dimVal = null;
if (dimObject instanceof String) {
dimVal = (String) dimObject;
} else if (dimObject instanceof List) {
dimVal = String.join("|", (List<String>) dimObject);
}
rowsFromSegment.add(makeCSVFormatRow(selector1.getObject().toString(), dimVal, selector3.defaultGetObject().toString()));
cursor.advance();
}
}
return rowsFromSegment;
}
use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
// Download all segments locally.
// Note: this requires enough local storage space to fit all of the segments, even though
// IngestSegmentFirehose iterates over the segments in series. We may want to change this
// to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
final DataSegment segment = chunk.getObject();
segmentFileMap.computeIfAbsent(segment, k -> {
try {
return segmentCacheManager.getSegmentFiles(segment);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
});
}
}
final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
})));
final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.
the class DruidInputSource method fixedFormatReader.
@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
// noinspection ConstantConditions
final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
// noinspection ConstantConditions
return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
}).iterator();
final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
final InputRowSchema inputRowSchemaToUse;
if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
// Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
} else {
inputRowSchemaToUse = inputRowSchema;
}
if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
// Slight chance the user did this intentionally, but not likely. Log a warning.
LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
}
return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.
the class AbstractMultiPhaseParallelIndexingTest method loadSegment.
private Segment loadSegment(DataSegment dataSegment, File tempSegmentDir) {
final SegmentCacheManager cacheManager = new SegmentCacheManagerFactory(getObjectMapper()).manufacturate(tempSegmentDir);
final SegmentLoader loader = new SegmentLocalCacheLoader(cacheManager, getIndexIO(), getObjectMapper());
try {
return loader.getSegment(dataSegment, false, SegmentLazyLoadFailCallback.NOOP);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
}
Aggregations