use of org.apache.druid.segment.IndexIO in project druid by druid-io.
the class ColumnarLongsEncodeDataFromSegmentBenchmark method initializeSegmentValueIntermediaryFile.
/**
* writes column values to an intermediary text file, 1 per line, encoders read from this file as input to write
* encoded column files.
*/
private void initializeSegmentValueIntermediaryFile() throws IOException {
File dir = getTmpDir();
File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName));
if (!dataFile.exists()) {
final IndexIO indexIO = new IndexIO(new DefaultObjectMapper(), () -> 0);
try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) {
final Set<String> columnNames = new LinkedHashSet<>();
columnNames.add(ColumnHolder.TIME_COLUMN_NAME);
Iterables.addAll(columnNames, index.getColumnNames());
final ColumnHolder column = index.getColumnHolder(columnName);
final ColumnCapabilities capabilities = column.getCapabilities();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
if (!capabilities.is(ValueType.LONG)) {
throw new RuntimeException("Invalid column type, expected 'Long'");
}
LongsColumn theColumn = (LongsColumn) column.getColumn();
for (int i = 0; i < theColumn.length(); i++) {
long value = theColumn.getLongSingleValueRow(i);
writer.write(value + "\n");
}
}
}
}
}
use of org.apache.druid.segment.IndexIO in project druid by druid-io.
the class SegmentizerFactoryTest method testFactory.
@Test
public void testFactory() throws IOException {
File factoryFile = Files.createTempFile("", "factory.json").toFile();
FileOutputStream fos = new FileOutputStream(factoryFile);
ObjectMapper mapper = new DefaultObjectMapper();
mapper.registerModule(new SegmentizerModule());
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 777;
}
});
mapper.setInjectableValues(new InjectableValues.Std().addValue(IndexIO.class, indexIO));
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
fos.close();
SegmentizerFactory factory = mapper.readValue(factoryFile, SegmentizerFactory.class);
Assert.assertTrue(factory instanceof MMappedQueryableSegmentizerFactory);
}
use of org.apache.druid.segment.IndexIO in project druid by druid-io.
the class DruidInputSource method fixedFormatReader.
@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
// noinspection ConstantConditions
final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
// noinspection ConstantConditions
return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
}).iterator();
final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
final InputRowSchema inputRowSchemaToUse;
if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
// Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
} else {
inputRowSchemaToUse = inputRowSchema;
}
if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
// Slight chance the user did this intentionally, but not likely. Log a warning.
LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
}
return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
use of org.apache.druid.segment.IndexIO in project druid by druid-io.
the class SeekableStreamIndexTaskTestBase method readSegmentColumn.
protected List<String> readSegmentColumn(final String column, final SegmentDescriptor descriptor) throws IOException {
File indexBasePath = new File(StringUtils.format("%s/%s/%s_%s/%s/%d", getSegmentDirectory(), OLD_DATA_SCHEMA.getDataSource(), descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
File outputLocation = new File(directory, StringUtils.format("%s_%s_%s_%s", descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
outputLocation.mkdir();
CompressionUtils.unzip(Files.asByteSource(new File(indexBasePath.listFiles()[0], "index.zip")), outputLocation, Predicates.alwaysFalse(), false);
IndexIO indexIO = new TestUtils().getTestIndexIO();
QueryableIndex index = indexIO.loadIndex(outputLocation);
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) index.getColumnHolder(column).getColumn();
List<String> values = new ArrayList<>();
for (int i = 0; i < theColumn.length(); i++) {
int id = theColumn.getSingleValueRow(i);
String value = theColumn.lookupName(id);
values.add(value);
}
return values;
}
use of org.apache.druid.segment.IndexIO in project druid by druid-io.
the class AggregationTestHelper method createTopNQueryAggregationTestHelper.
public static AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
ObjectMapper mapper = TestHelper.makeJsonMapper();
TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig());
final CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
return ByteBuffer.allocate(10 * 1024 * 1024);
}
});
final Closer resourceCloser = Closer.create();
TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister, resourceCloser, Collections.emptyMap());
}
Aggregations