Search in sources :

Example 31 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class SegmentFormatConverterV1ToV2 method convert.

@Override
public void convert(File indexSegmentDir) throws Exception {
    SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
    Set<String> columns = segmentMetadataImpl.getAllColumns();
    SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
        if (columnMetadata.isSorted()) {
            // no need to change sorted forward index
            continue;
        }
        PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
        if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int value = v1Reader.getInt(row);
                v2Writer.setInt(row, value);
            }
            v1Reader.close();
            v2Writer.close();
            File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            // FIXME
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
        if (!columnMetadata.isSingleValue()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
            int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int length = v1Reader.getIntArray(row, values);
                int[] copy = new int[length];
                System.arraycopy(values, 0, copy, 0, length);
                v2Writer.setIntArray(row, copy);
            }
            v1Reader.close();
            v2Writer.close();
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
    }
    File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
    File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
    bis = new BufferedInputStream(new FileInputStream(metadataFile));
    bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
    IOUtils.copy(bis, bos);
    bis.close();
    bos.close();
    final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
    // update the segment version
    properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
    metadataFile.delete();
    properties.save(metadataFile);
}
Also used : SingleColumnMultiValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) SingleColumnSingleValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter) BufferedInputStream(java.io.BufferedInputStream) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) BufferedOutputStream(java.io.BufferedOutputStream) SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) FileInputStream(java.io.FileInputStream) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 32 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class TablesResource method getSegmentMetadata.

@GET
@Path("/tables/{tableName}/segments/{segmentName}/metadata")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation(value = "Provide segment metadata", notes = "Provide segments metadata for the segment on server")
@ApiResponses(value = { @ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error", response = ErrorInfo.class), @ApiResponse(code = 404, message = "Table or segment not found", response = ErrorInfo.class) })
public String getSegmentMetadata(@ApiParam(value = "Table name including type", required = true, example = "myTable_OFFLINE") @PathParam("tableName") String tableName, @ApiParam(value = "Segment Name", required = true) @PathParam("segmentName") String segmentName, @ApiParam(value = "column name", required = false, allowMultiple = true, defaultValue = "") @QueryParam("columns") @DefaultValue("") List<String> columns) {
    TableDataManager tableDataManager = checkGetTableDataManager(tableName);
    SegmentDataManager segmentDataManager = null;
    try {
        segmentDataManager = tableDataManager.acquireSegment(segmentName);
        if (segmentDataManager == null) {
            throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), Response.Status.NOT_FOUND);
        }
        SegmentMetadataImpl segmentMetadata = (SegmentMetadataImpl) segmentDataManager.getSegment().getSegmentMetadata();
        Set<String> columnSet;
        if (columns.size() == 1 && columns.get(0).equals("*")) {
            columnSet = null;
        } else {
            columnSet = new HashSet<>(columns);
        }
        try {
            return segmentMetadata.toJson(columnSet).toString();
        } catch (JSONException e) {
            LOGGER.error("Failed to convert table {} segment {} to json", tableName, segmentMetadata);
            throw new WebApplicationException("Failed to convert segment metadata to json", Response.Status.INTERNAL_SERVER_ERROR);
        }
    } finally {
        if (segmentDataManager != null) {
            tableDataManager.releaseSegment(segmentDataManager);
        }
    }
}
Also used : SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) WebApplicationException(javax.ws.rs.WebApplicationException) TableDataManager(com.linkedin.pinot.core.data.manager.offline.TableDataManager) JSONException(org.json.JSONException) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 33 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class ForwardIndexReaderBenchmark method benchmarkForwardIndex.

private static void benchmarkForwardIndex(String indexDir, List<String> includeColumns) throws Exception {
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(new File(indexDir));
    String segmentVersion = segmentMetadata.getVersion();
    Set<String> columns = segmentMetadata.getAllColumns();
    for (String column : columns) {
        if (includeColumns != null && !includeColumns.isEmpty()) {
            if (!includeColumns.contains(column)) {
                continue;
            }
        }
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        if (columnMetadata.isSingleValue()) {
            continue;
        }
        if (!columnMetadata.isSingleValue()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            multiValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getMaxNumberOfMultiValues(), columnMetadata.getBitsPerElement());
        } else if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            singleValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getBitsPerElement());
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 34 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class SegmentDumpTool method doMain.

public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    parser.parseArgument(args);
    File segmentDir = new File(segmentPath);
    SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);
    // All columns by default
    if (columnNames == null) {
        columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
        Collections.sort(columnNames);
    }
    IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);
    Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
    Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();
    for (String columnName : columnNames) {
        DataSource dataSource = indexSegment.getDataSource(columnName);
        dataSource.open();
        Block block = dataSource.nextBlock();
        BlockValSet blockValSet = block.getBlockValueSet();
        BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
        iterators.put(columnName, itr);
        dictionaries.put(columnName, dataSource.getDictionary());
    }
    System.out.print("Doc\t");
    for (String columnName : columnNames) {
        System.out.print(columnName);
        System.out.print("\t");
    }
    System.out.println();
    for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
        System.out.print(i);
        System.out.print("\t");
        for (String columnName : columnNames) {
            FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
            BlockSingleValIterator itr = iterators.get(columnName);
            Integer encodedValue = itr.nextIntVal();
            Object value = dictionaries.get(columnName).get(encodedValue);
            System.out.print(value);
            System.out.print("\t");
        }
        System.out.println();
    }
    if (dumpStarTree) {
        System.out.println();
        File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
        StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
        tree.printTree();
    }
}
Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) CmdLineParser(org.kohsuke.args4j.CmdLineParser) HashMap(java.util.HashMap) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) Block(com.linkedin.pinot.core.common.Block) BlockValSet(com.linkedin.pinot.core.common.BlockValSet) StarTreeInterf(com.linkedin.pinot.core.startree.StarTreeInterf) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File)

Example 35 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class SegmentPreProcessor method process.

public void process() throws Exception {
    SegmentDirectory.Writer segmentWriter = null;
    try {
        segmentWriter = _segmentDirectory.createWriter();
        // Create column inverted indices according to the index config.
        InvertedIndexHandler invertedIndexHandler = new InvertedIndexHandler(_indexDir, _segmentMetadata, _indexConfig, segmentWriter);
        invertedIndexHandler.createInvertedIndices();
        if (_segmentMetadata.getTotalDocs() != 0) {
            if (_enableDefaultColumns && (_schema != null)) {
                // Update default columns according to the schema.
                // NOTE: This step may modify the segment metadata. When adding new steps after this, reload the metadata.
                DefaultColumnHandler defaultColumnHandler = DefaultColumnHandlerFactory.getDefaultColumnHandler(_indexDir, _schema, _segmentMetadata, segmentWriter);
                defaultColumnHandler.updateDefaultColumns();
            }
            if (_columnMinMaxValueGeneratorMode != ColumnMinMaxValueGeneratorMode.NONE) {
                // Add min/max value to column metadata according to the prune mode.
                // For star-tree index, because it can only increase the range, so min/max value can still be used in pruner.
                // NOTE: This step may modify the segment metadata. When adding new steps after this, reload the metadata.
                // Reload the metadata.
                _segmentMetadata = new SegmentMetadataImpl(_indexDir);
                ColumnMinMaxValueGenerator columnMinMaxValueGenerator = new ColumnMinMaxValueGenerator(_indexDir, _segmentMetadata, segmentWriter, _columnMinMaxValueGeneratorMode);
                columnMinMaxValueGenerator.addColumnMinMaxValue();
            }
        }
    } finally {
        if (segmentWriter != null) {
            segmentWriter.saveAndClose();
        }
    }
}
Also used : DefaultColumnHandler(com.linkedin.pinot.core.segment.index.loader.defaultcolumn.DefaultColumnHandler) InvertedIndexHandler(com.linkedin.pinot.core.segment.index.loader.invertedindex.InvertedIndexHandler) ColumnMinMaxValueGenerator(com.linkedin.pinot.core.segment.index.loader.columnminmaxvalue.ColumnMinMaxValueGenerator) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory)

Aggregations

SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)36 File (java.io.File)18 Test (org.testng.annotations.Test)13 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)10 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)8 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)8 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)5 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)5 OfflineSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata)4 FileTime (java.nio.file.attribute.FileTime)4 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)3 SegmentV1V2ToV3FormatConverter (com.linkedin.pinot.core.segment.index.converter.SegmentV1V2ToV3FormatConverter)3 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)3 ArrayList (java.util.ArrayList)3 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)2 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)2 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)2 ColumnIndexContainer (com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer)2 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)2 StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)2