Search in sources :

Example 11 with IndexIO

use of org.apache.druid.segment.IndexIO in project druid by druid-io.

the class ColumnarLongsEncodeDataFromSegmentBenchmark method initializeSegmentValueIntermediaryFile.

/**
 * writes column values to an intermediary text file, 1 per line, encoders read from this file as input to write
 * encoded column files.
 */
private void initializeSegmentValueIntermediaryFile() throws IOException {
    File dir = getTmpDir();
    File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName));
    if (!dataFile.exists()) {
        final IndexIO indexIO = new IndexIO(new DefaultObjectMapper(), () -> 0);
        try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) {
            final Set<String> columnNames = new LinkedHashSet<>();
            columnNames.add(ColumnHolder.TIME_COLUMN_NAME);
            Iterables.addAll(columnNames, index.getColumnNames());
            final ColumnHolder column = index.getColumnHolder(columnName);
            final ColumnCapabilities capabilities = column.getCapabilities();
            try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
                if (!capabilities.is(ValueType.LONG)) {
                    throw new RuntimeException("Invalid column type, expected 'Long'");
                }
                LongsColumn theColumn = (LongsColumn) column.getColumn();
                for (int i = 0; i < theColumn.length(); i++) {
                    long value = theColumn.getLongSingleValueRow(i);
                    writer.write(value + "\n");
                }
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) LongsColumn(org.apache.druid.segment.column.LongsColumn) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) IndexIO(org.apache.druid.segment.IndexIO) QueryableIndex(org.apache.druid.segment.QueryableIndex) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) File(java.io.File) Writer(java.io.Writer)

Example 12 with IndexIO

use of org.apache.druid.segment.IndexIO in project druid by druid-io.

the class SegmentizerFactoryTest method testFactory.

@Test
public void testFactory() throws IOException {
    File factoryFile = Files.createTempFile("", "factory.json").toFile();
    FileOutputStream fos = new FileOutputStream(factoryFile);
    ObjectMapper mapper = new DefaultObjectMapper();
    mapper.registerModule(new SegmentizerModule());
    IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {

        @Override
        public int columnCacheSizeBytes() {
            return 777;
        }
    });
    mapper.setInjectableValues(new InjectableValues.Std().addValue(IndexIO.class, indexIO));
    mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
    fos.close();
    SegmentizerFactory factory = mapper.readValue(factoryFile, SegmentizerFactory.class);
    Assert.assertTrue(factory instanceof MMappedQueryableSegmentizerFactory);
}
Also used : IndexIO(org.apache.druid.segment.IndexIO) ColumnConfig(org.apache.druid.segment.column.ColumnConfig) FileOutputStream(java.io.FileOutputStream) SegmentizerModule(org.apache.druid.jackson.SegmentizerModule) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Example 13 with IndexIO

use of org.apache.druid.segment.IndexIO in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Example 14 with IndexIO

use of org.apache.druid.segment.IndexIO in project druid by druid-io.

the class SeekableStreamIndexTaskTestBase method readSegmentColumn.

protected List<String> readSegmentColumn(final String column, final SegmentDescriptor descriptor) throws IOException {
    File indexBasePath = new File(StringUtils.format("%s/%s/%s_%s/%s/%d", getSegmentDirectory(), OLD_DATA_SCHEMA.getDataSource(), descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
    File outputLocation = new File(directory, StringUtils.format("%s_%s_%s_%s", descriptor.getInterval().getStart(), descriptor.getInterval().getEnd(), descriptor.getVersion(), descriptor.getPartitionNumber()));
    outputLocation.mkdir();
    CompressionUtils.unzip(Files.asByteSource(new File(indexBasePath.listFiles()[0], "index.zip")), outputLocation, Predicates.alwaysFalse(), false);
    IndexIO indexIO = new TestUtils().getTestIndexIO();
    QueryableIndex index = indexIO.loadIndex(outputLocation);
    DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) index.getColumnHolder(column).getColumn();
    List<String> values = new ArrayList<>();
    for (int i = 0; i < theColumn.length(); i++) {
        int id = theColumn.getSingleValueRow(i);
        String value = theColumn.lookupName(id);
        values.add(value);
    }
    return values;
}
Also used : TestUtils(org.apache.druid.indexing.common.TestUtils) IndexIO(org.apache.druid.segment.IndexIO) QueryableIndex(org.apache.druid.segment.QueryableIndex) ArrayList(java.util.ArrayList) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) File(java.io.File)

Example 15 with IndexIO

use of org.apache.druid.segment.IndexIO in project druid by druid-io.

the class AggregationTestHelper method createTopNQueryAggregationTestHelper.

public static AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
    ObjectMapper mapper = TestHelper.makeJsonMapper();
    TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig());
    final CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            return ByteBuffer.allocate(10 * 1024 * 1024);
        }
    });
    final Closer resourceCloser = Closer.create();
    TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {

        @Override
        public int columnCacheSizeBytes() {
            return 0;
        }
    });
    return new AggregationTestHelper(mapper, new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister, resourceCloser, Collections.emptyMap());
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ColumnConfig(org.apache.druid.segment.column.ColumnConfig) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) CloseableStupidPool(org.apache.druid.collections.CloseableStupidPool) ByteBuffer(java.nio.ByteBuffer) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) IndexIO(org.apache.druid.segment.IndexIO) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Aggregations

IndexIO (org.apache.druid.segment.IndexIO)16 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)8 File (java.io.File)8 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)6 IndexMergerV9 (org.apache.druid.segment.IndexMergerV9)6 ColumnConfig (org.apache.druid.segment.column.ColumnConfig)5 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 Before (org.junit.Before)4 InjectableValues (com.fasterxml.jackson.databind.InjectableValues)3 ArrayList (java.util.ArrayList)3 SegmentizerModule (org.apache.druid.jackson.SegmentizerModule)3 Closer (org.apache.druid.java.util.common.io.Closer)3 IndexMerger (org.apache.druid.segment.IndexMerger)3 IndexSpec (org.apache.druid.segment.IndexSpec)3 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)3 NamedType (com.fasterxml.jackson.databind.jsontype.NamedType)2 SimpleModule (com.fasterxml.jackson.databind.module.SimpleModule)2 Injector (com.google.inject.Injector)2 IAE (org.apache.druid.java.util.common.IAE)2 ExprMacroTable (org.apache.druid.math.expr.ExprMacroTable)2