Search in sources :

Example 11 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method createSchema.

private void createSchema(SegmentProperties segmentProperties) throws MemoryException {
    List<CarbonRowSchema> indexSchemas = new ArrayList<>();
    // Index key
    indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
    getMinMaxSchema(segmentProperties, indexSchemas);
    // for number of rows.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.INT));
    // for table block path
    indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
    // for number of pages.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
    // for version number.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
    // for schema updated time.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
    // for blocklet info
    indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
    // for block footer offset.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
    // for locations
    indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
    // for relative blocklet id i.e. blocklet id that belongs to a particular part file.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
    // for storing block length.
    indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
    unsafeMemoryDMStore = new UnsafeMemoryDMStore(indexSchemas.toArray(new CarbonRowSchema[indexSchemas.size()]));
}
Also used : UnsafeMemoryDMStore(org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore) ArrayList(java.util.ArrayList) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 12 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method createSummarySchema.

/**
 * Creates the schema to store summary information or the information which can be stored only
 * once per datamap. It stores datamap level max/min of each column and partition information of
 * datamap
 * @param segmentProperties
 * @throws MemoryException
 */
private void createSummarySchema(SegmentProperties segmentProperties, byte[] schemaBinary, byte[] filePath, byte[] fileName, byte[] segmentId) throws MemoryException {
    List<CarbonRowSchema> taskMinMaxSchemas = new ArrayList<>();
    getMinMaxSchema(segmentProperties, taskMinMaxSchemas);
    // for storing column schema
    taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, schemaBinary.length));
    // for storing file path
    taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, filePath.length));
    // for storing file name
    taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, fileName.length));
    // for storing segmentid
    taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, segmentId.length));
    unsafeMemorySummaryDMStore = new UnsafeMemoryDMStore(taskMinMaxSchemas.toArray(new CarbonRowSchema[taskMinMaxSchemas.size()]));
}
Also used : UnsafeMemoryDMStore(org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore) ArrayList(java.util.ArrayList) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 13 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method loadToUnsafe.

private DataMapRowImpl loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
    int[] minMaxLen = segmentProperties.getColumnsValueSize();
    List<BlockletInfo> blockletList = fileFooter.getBlockletList();
    CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
    // Add one row to maintain task level min max for segment pruning
    if (!blockletList.isEmpty() && summaryRow == null) {
        summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
    }
    for (int index = 0; index < blockletList.size(); index++) {
        DataMapRow row = new DataMapRowImpl(schema);
        int ordinal = 0;
        int taskMinMaxOrdinal = 0;
        BlockletInfo blockletInfo = blockletList.get(index);
        // add start key as index key
        row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);
        BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
        byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], minValues), ordinal);
        // compute and set task level min values
        addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], minValues, TASK_MIN_VALUES_INDEX, true);
        ordinal++;
        taskMinMaxOrdinal++;
        byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], maxValues), ordinal);
        // compute and set task level max values
        addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], maxValues, TASK_MAX_VALUES_INDEX, false);
        ordinal++;
        row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
        // add file path
        byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
        row.setByteArray(filePathBytes, ordinal++);
        // add pages
        row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
        // add version number
        row.setShort(fileFooter.getVersionId().number(), ordinal++);
        // add schema updated time
        row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
        // add blocklet info
        byte[] serializedData;
        try {
            ByteArrayOutputStream stream = new ByteArrayOutputStream();
            DataOutput dataOutput = new DataOutputStream(stream);
            blockletInfo.write(dataOutput);
            serializedData = stream.toByteArray();
            row.setByteArray(serializedData, ordinal++);
            // Add block footer offset, it is used if we need to read footer of block
            row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
            setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
            ordinal++;
            // for relative blockelt id i.e blocklet id that belongs to a particular part file
            row.setShort((short) relativeBlockletId++, ordinal++);
            // Store block size
            row.setLong(blockMetaInfo.getSize(), ordinal);
            unsafeMemoryDMStore.addIndexRowToUnsafe(row);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    return summaryRow;
}
Also used : DataOutput(java.io.DataOutput) DataOutputStream(java.io.DataOutputStream) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) UnsupportedEncodingException(java.io.UnsupportedEncodingException) MemoryException(org.apache.carbondata.core.memory.MemoryException) IOException(java.io.IOException) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 14 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class UnsafeDataMapRow method convertToSafeRow.

/**
 * Convert unsafe to safe row.
 *
 * @return
 */
public DataMapRow convertToSafeRow() {
    DataMapRowImpl row = new DataMapRowImpl(schemas);
    int runningLength = 0;
    for (int i = 0; i < schemas.length; i++) {
        CarbonRowSchema schema = schemas[i];
        switch(schema.getSchemaType()) {
            case FIXED:
                DataType dataType = schema.getDataType();
                if (dataType == DataTypes.BYTE) {
                    row.setByte(getUnsafe().getByte(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.SHORT) {
                    row.setShort(getUnsafe().getShort(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.INT) {
                    row.setInt(getUnsafe().getInt(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.LONG) {
                    row.setLong(getUnsafe().getLong(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.FLOAT) {
                    row.setFloat(getUnsafe().getFloat(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.DOUBLE) {
                    row.setDouble(getUnsafe().getDouble(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
                    runningLength += schema.getLength();
                } else if (dataType == DataTypes.BYTE_ARRAY) {
                    byte[] data = new byte[schema.getLength()];
                    getUnsafe().copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength, data, BYTE_ARRAY_OFFSET, data.length);
                    row.setByteArray(data, i);
                    runningLength += data.length;
                } else {
                    throw new UnsupportedOperationException("unsupported data type for unsafe storage: " + schema.getDataType());
                }
                break;
            case VARIABLE:
                short length = getUnsafe().getShort(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength);
                runningLength += 2;
                byte[] data = new byte[length];
                getUnsafe().copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength, data, BYTE_ARRAY_OFFSET, data.length);
                runningLength += data.length;
                row.setByteArray(data, i);
                break;
            case STRUCT:
                DataMapRow structRow = ((UnsafeDataMapRow) getRow(i)).convertToSafeRow();
                row.setRow(structRow, i);
                runningLength += structRow.getTotalSizeInBytes();
                break;
            default:
                throw new UnsupportedOperationException("unsupported data type for unsafe storage: " + schema.getDataType());
        }
    }
    row.setTotalLengthInBytes(runningLength);
    return row;
}
Also used : DataType(org.apache.carbondata.core.metadata.datatype.DataType) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 15 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockIndex method addMinMax.

protected IndexRow addMinMax(CarbonRowSchema carbonRowSchema, byte[][] minValues) {
    CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
    IndexRow minRow = new IndexRowImpl(minSchemas);
    int minOrdinal = 0;
    // min value adding
    for (int i = 0; i < minValues.length; i++) {
        minRow.setByteArray(minValues[i], minOrdinal++);
    }
    return minRow;
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Aggregations

CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)18 IndexRow (org.apache.carbondata.core.indexstore.row.IndexRow)6 IndexRowImpl (org.apache.carbondata.core.indexstore.row.IndexRowImpl)6 DataMapRow (org.apache.carbondata.core.indexstore.row.DataMapRow)5 ArrayList (java.util.ArrayList)4 DataMapRowImpl (org.apache.carbondata.core.indexstore.row.DataMapRowImpl)4 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)3 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)2 UnsafeMemoryDMStore (org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore)2 MemoryException (org.apache.carbondata.core.memory.MemoryException)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 HashMap (java.util.HashMap)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1