Search in sources :

Example 21 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class TablesResource method getSegmentMetadata.

@GET
@Path("/tables/{tableName}/segments/{segmentName}/metadata")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation(value = "Provide segment metadata", notes = "Provide segments metadata for the segment on server")
@ApiResponses(value = { @ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error", response = ErrorInfo.class), @ApiResponse(code = 404, message = "Table or segment not found", response = ErrorInfo.class) })
public String getSegmentMetadata(@ApiParam(value = "Table name including type", required = true, example = "myTable_OFFLINE") @PathParam("tableName") String tableName, @ApiParam(value = "Segment Name", required = true) @PathParam("segmentName") String segmentName, @ApiParam(value = "column name", required = false, allowMultiple = true, defaultValue = "") @QueryParam("columns") @DefaultValue("") List<String> columns) {
    TableDataManager tableDataManager = checkGetTableDataManager(tableName);
    SegmentDataManager segmentDataManager = null;
    try {
        segmentDataManager = tableDataManager.acquireSegment(segmentName);
        if (segmentDataManager == null) {
            throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), Response.Status.NOT_FOUND);
        }
        SegmentMetadataImpl segmentMetadata = (SegmentMetadataImpl) segmentDataManager.getSegment().getSegmentMetadata();
        Set<String> columnSet;
        if (columns.size() == 1 && columns.get(0).equals("*")) {
            columnSet = null;
        } else {
            columnSet = new HashSet<>(columns);
        }
        try {
            return segmentMetadata.toJson(columnSet).toString();
        } catch (JSONException e) {
            LOGGER.error("Failed to convert table {} segment {} to json", tableName, segmentMetadata);
            throw new WebApplicationException("Failed to convert segment metadata to json", Response.Status.INTERNAL_SERVER_ERROR);
        }
    } finally {
        if (segmentDataManager != null) {
            tableDataManager.releaseSegment(segmentDataManager);
        }
    }
}
Also used : SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) WebApplicationException(javax.ws.rs.WebApplicationException) TableDataManager(com.linkedin.pinot.core.data.manager.offline.TableDataManager) JSONException(org.json.JSONException) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 22 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class ForwardIndexReaderBenchmark method benchmarkForwardIndex.

private static void benchmarkForwardIndex(String indexDir, List<String> includeColumns) throws Exception {
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(new File(indexDir));
    String segmentVersion = segmentMetadata.getVersion();
    Set<String> columns = segmentMetadata.getAllColumns();
    for (String column : columns) {
        if (includeColumns != null && !includeColumns.isEmpty()) {
            if (!includeColumns.contains(column)) {
                continue;
            }
        }
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        if (columnMetadata.isSingleValue()) {
            continue;
        }
        if (!columnMetadata.isSingleValue()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            multiValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getMaxNumberOfMultiValues(), columnMetadata.getBitsPerElement());
        } else if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            singleValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getBitsPerElement());
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 23 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class SegmentDumpTool method doMain.

public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    parser.parseArgument(args);
    File segmentDir = new File(segmentPath);
    SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);
    // All columns by default
    if (columnNames == null) {
        columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
        Collections.sort(columnNames);
    }
    IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);
    Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
    Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();
    for (String columnName : columnNames) {
        DataSource dataSource = indexSegment.getDataSource(columnName);
        dataSource.open();
        Block block = dataSource.nextBlock();
        BlockValSet blockValSet = block.getBlockValueSet();
        BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
        iterators.put(columnName, itr);
        dictionaries.put(columnName, dataSource.getDictionary());
    }
    System.out.print("Doc\t");
    for (String columnName : columnNames) {
        System.out.print(columnName);
        System.out.print("\t");
    }
    System.out.println();
    for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
        System.out.print(i);
        System.out.print("\t");
        for (String columnName : columnNames) {
            FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
            BlockSingleValIterator itr = iterators.get(columnName);
            Integer encodedValue = itr.nextIntVal();
            Object value = dictionaries.get(columnName).get(encodedValue);
            System.out.print(value);
            System.out.print("\t");
        }
        System.out.println();
    }
    if (dumpStarTree) {
        System.out.println();
        File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
        StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
        tree.printTree();
    }
}
Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) CmdLineParser(org.kohsuke.args4j.CmdLineParser) HashMap(java.util.HashMap) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) Block(com.linkedin.pinot.core.common.Block) BlockValSet(com.linkedin.pinot.core.common.BlockValSet) StarTreeInterf(com.linkedin.pinot.core.startree.StarTreeInterf) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File)

Example 24 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManager method extractSegmentMetadata.

/**
   * Extract the segment metadata file from the tar-zipped segment file that is expected to be in the
   * directory for the table.
   * Segment tar-zipped file path: DATADIR/rawTableName/segmentName
   * We extract the metadata into a file into a file in the same level,as in: DATADIR/rawTableName/segmentName.metadata
   * @param rawTableName Name of the table (not including the REALTIME extension)
   * @param segmentNameStr Name of the segment
   * @return SegmentMetadataImpl if it is able to extract the metadata file from the tar-zipped segment file.
   */
protected SegmentMetadataImpl extractSegmentMetadata(final String rawTableName, final String segmentNameStr) {
    final String baseDir = StringUtil.join("/", _controllerConf.getDataDir(), rawTableName);
    final String segFileName = StringUtil.join("/", baseDir, segmentNameStr);
    final File segFile = new File(segFileName);
    SegmentMetadataImpl segmentMetadata;
    Path metadataPath = null;
    try {
        InputStream is = TarGzCompressionUtils.unTarOneFile(new FileInputStream(segFile), V1Constants.MetadataKeys.METADATA_FILE_NAME);
        metadataPath = FileSystems.getDefault().getPath(baseDir, segmentNameStr + ".metadata");
        Files.copy(is, metadataPath);
        segmentMetadata = new SegmentMetadataImpl(new File(metadataPath.toString()));
    } catch (Exception e) {
        throw new RuntimeException("Exception extacting and reading segment metadata for " + segmentNameStr, e);
    } finally {
        if (metadataPath != null) {
            FileUtils.deleteQuietly(new File(metadataPath.toString()));
        }
    }
    return segmentMetadata;
}
Also used : Path(java.nio.file.Path) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File) FileInputStream(java.io.FileInputStream) TimeoutException(java.util.concurrent.TimeoutException)

Example 25 with SegmentMetadataImpl

use of com.linkedin.pinot.core.segment.index.SegmentMetadataImpl in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManager method commitSegment.

/**
   * This method is invoked after the realtime segment is uploaded but before a response is sent to the server.
   * It updates the propertystore segment metadata from IN_PROGRESS to DONE, and also creates new propertystore
   * records for new segments, and puts them in idealstate in CONSUMING state.
   *
   * @param rawTableName Raw table name
   * @param committingSegmentNameStr Committing segment name
   * @param nextOffset The offset with which the next segment should start.
   * @return
   */
public boolean commitSegment(String rawTableName, final String committingSegmentNameStr, long nextOffset) {
    final long now = System.currentTimeMillis();
    final String realtimeTableName = TableNameBuilder.REALTIME_TABLE_NAME_BUILDER.forTable(rawTableName);
    final LLCRealtimeSegmentZKMetadata oldSegMetadata = getRealtimeSegmentZKMetadata(realtimeTableName, committingSegmentNameStr);
    final LLCSegmentName oldSegmentName = new LLCSegmentName(committingSegmentNameStr);
    final int partitionId = oldSegmentName.getPartitionId();
    final int oldSeqNum = oldSegmentName.getSequenceNumber();
    oldSegMetadata.setEndOffset(nextOffset);
    oldSegMetadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
    oldSegMetadata.setDownloadUrl(ControllerConf.constructDownloadUrl(rawTableName, committingSegmentNameStr, _controllerConf.generateVipUrl()));
    // Pull segment metadata from incoming segment and set it in zk segment metadata
    SegmentMetadataImpl segmentMetadata = extractSegmentMetadata(rawTableName, committingSegmentNameStr);
    oldSegMetadata.setCrc(Long.valueOf(segmentMetadata.getCrc()));
    oldSegMetadata.setStartTime(segmentMetadata.getTimeInterval().getStartMillis());
    oldSegMetadata.setEndTime(segmentMetadata.getTimeInterval().getEndMillis());
    oldSegMetadata.setTimeUnit(TimeUnit.MILLISECONDS);
    oldSegMetadata.setIndexVersion(segmentMetadata.getVersion());
    oldSegMetadata.setTotalRawDocs(segmentMetadata.getTotalRawDocs());
    final ZNRecord oldZnRecord = oldSegMetadata.toZNRecord();
    final String oldZnodePath = ZKMetadataProvider.constructPropertyStorePathForSegment(realtimeTableName, committingSegmentNameStr);
    final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
    // creating a new segment
    if (partitionAssignment == null) {
        LOGGER.warn("Kafka partition assignment not found for {}", realtimeTableName);
        throw new RuntimeException("Kafka partition assigment not found. Not committing segment");
    }
    List<String> newInstances = partitionAssignment.getListField(Integer.toString(partitionId));
    // Construct segment metadata and idealstate for the new segment
    final int newSeqNum = oldSeqNum + 1;
    final long newStartOffset = nextOffset;
    LLCSegmentName newHolder = new LLCSegmentName(oldSegmentName.getTableName(), partitionId, newSeqNum, now);
    final String newSegmentNameStr = newHolder.getSegmentName();
    ZNRecord newZnRecord = makeZnRecordForNewSegment(rawTableName, newInstances.size(), newStartOffset, newSegmentNameStr);
    final LLCRealtimeSegmentZKMetadata newSegmentZKMetadata = new LLCRealtimeSegmentZKMetadata(newZnRecord);
    updateFlushThresholdForSegmentMetadata(newSegmentZKMetadata, partitionAssignment, getRealtimeTableFlushSizeForTable(rawTableName));
    newZnRecord = newSegmentZKMetadata.toZNRecord();
    final String newZnodePath = ZKMetadataProvider.constructPropertyStorePathForSegment(realtimeTableName, newSegmentNameStr);
    List<String> paths = new ArrayList<>(2);
    paths.add(oldZnodePath);
    paths.add(newZnodePath);
    List<ZNRecord> records = new ArrayList<>(2);
    records.add(oldZnRecord);
    records.add(newZnRecord);
    /*
     * Update zookeeper in two steps.
     *
     * Step 1: Update PROPERTYSTORE to change the segment metadata for old segment and add a new one for new segment
     * Step 2: Update IDEALSTATES to include the new segment in the idealstate for the table in CONSUMING state, and change
     *         the old segment to ONLINE state.
     *
     * The controller may fail between these two steps, so when a new controller takes over as leader, it needs to
     * check whether there are any recent segments in PROPERTYSTORE that are not accounted for in idealState. If so,
     * it should create the new segments in idealState.
     *
     * If the controller fails after step-2, we are fine because the idealState has the new segments.
     * If the controller fails before step-1, the server will see this as an upload failure, and will re-try.
     */
    writeSegmentsToPropertyStore(paths, records, realtimeTableName);
    // TODO Introduce a controller failure here for integration testing
    // When multiple segments of the same table complete around the same time it is possible that
    // the idealstate udpate fails due to contention. We serialize the updates to the idealstate
    // to reduce this contention. We may still contend with RetentionManager, or other updates
    // to idealstate from other controllers, but then we have the retry mechanism to get around that.
    // hash code can be negative, so make sure we are getting a positive lock index
    int lockIndex = (realtimeTableName.hashCode() & Integer.MAX_VALUE) % NUM_LOCKS;
    Lock lock = _idealstateUpdateLocks[lockIndex];
    try {
        lock.lock();
        updateIdealState(realtimeTableName, newInstances, committingSegmentNameStr, newSegmentNameStr);
        LOGGER.info("Changed {} to ONLINE and created {} in CONSUMING", committingSegmentNameStr, newSegmentNameStr);
    } finally {
        lock.unlock();
    }
    return true;
}
Also used : ArrayList(java.util.ArrayList) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName) ZNRecord(org.apache.helix.ZNRecord) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock)

Aggregations

SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)36 File (java.io.File)18 Test (org.testng.annotations.Test)13 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)10 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)8 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)8 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)5 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)5 OfflineSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata)4 FileTime (java.nio.file.attribute.FileTime)4 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)3 SegmentV1V2ToV3FormatConverter (com.linkedin.pinot.core.segment.index.converter.SegmentV1V2ToV3FormatConverter)3 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)3 ArrayList (java.util.ArrayList)3 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)2 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)2 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)2 ColumnIndexContainer (com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer)2 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)2 StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)2