use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method createSchema.
private void createSchema(SegmentProperties segmentProperties) throws MemoryException {
List<CarbonRowSchema> indexSchemas = new ArrayList<>();
// Index key
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
getMinMaxSchema(segmentProperties, indexSchemas);
// for number of rows.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.INT));
// for table block path
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for number of pages.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for version number.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for schema updated time.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for blocklet info
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for block footer offset.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for locations
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for relative blocklet id i.e. blocklet id that belongs to a particular part file.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for storing block length.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
unsafeMemoryDMStore = new UnsafeMemoryDMStore(indexSchemas.toArray(new CarbonRowSchema[indexSchemas.size()]));
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method createSummarySchema.
/**
* Creates the schema to store summary information or the information which can be stored only
* once per datamap. It stores datamap level max/min of each column and partition information of
* datamap
* @param segmentProperties
* @throws MemoryException
*/
private void createSummarySchema(SegmentProperties segmentProperties, byte[] schemaBinary, byte[] filePath, byte[] fileName, byte[] segmentId) throws MemoryException {
List<CarbonRowSchema> taskMinMaxSchemas = new ArrayList<>();
getMinMaxSchema(segmentProperties, taskMinMaxSchemas);
// for storing column schema
taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, schemaBinary.length));
// for storing file path
taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, filePath.length));
// for storing file name
taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, fileName.length));
// for storing segmentid
taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, segmentId.length));
unsafeMemorySummaryDMStore = new UnsafeMemoryDMStore(taskMinMaxSchemas.toArray(new CarbonRowSchema[taskMinMaxSchemas.size()]));
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method loadToUnsafe.
private DataMapRowImpl loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
int[] minMaxLen = segmentProperties.getColumnsValueSize();
List<BlockletInfo> blockletList = fileFooter.getBlockletList();
CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
// Add one row to maintain task level min max for segment pruning
if (!blockletList.isEmpty() && summaryRow == null) {
summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
}
for (int index = 0; index < blockletList.size(); index++) {
DataMapRow row = new DataMapRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 0;
BlockletInfo blockletInfo = blockletList.get(index);
// add start key as index key
row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);
BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
row.setRow(addMinMax(minMaxLen, schema[ordinal], minValues), ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], minValues, TASK_MIN_VALUES_INDEX, true);
ordinal++;
taskMinMaxOrdinal++;
byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
row.setRow(addMinMax(minMaxLen, schema[ordinal], maxValues), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], maxValues, TASK_MAX_VALUES_INDEX, false);
ordinal++;
row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
// add file path
byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add pages
row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
// add blocklet info
byte[] serializedData;
try {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
DataOutput dataOutput = new DataOutputStream(stream);
blockletInfo.write(dataOutput);
serializedData = stream.toByteArray();
row.setByteArray(serializedData, ordinal++);
// Add block footer offset, it is used if we need to read footer of block
row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
ordinal++;
// for relative blockelt id i.e blocklet id that belongs to a particular part file
row.setShort((short) relativeBlockletId++, ordinal++);
// Store block size
row.setLong(blockMetaInfo.getSize(), ordinal);
unsafeMemoryDMStore.addIndexRowToUnsafe(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class UnsafeDataMapRow method convertToSafeRow.
/**
* Convert unsafe to safe row.
*
* @return
*/
public DataMapRow convertToSafeRow() {
DataMapRowImpl row = new DataMapRowImpl(schemas);
int runningLength = 0;
for (int i = 0; i < schemas.length; i++) {
CarbonRowSchema schema = schemas[i];
switch(schema.getSchemaType()) {
case FIXED:
DataType dataType = schema.getDataType();
if (dataType == DataTypes.BYTE) {
row.setByte(getUnsafe().getByte(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.SHORT) {
row.setShort(getUnsafe().getShort(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.INT) {
row.setInt(getUnsafe().getInt(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.LONG) {
row.setLong(getUnsafe().getLong(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.FLOAT) {
row.setFloat(getUnsafe().getFloat(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.DOUBLE) {
row.setDouble(getUnsafe().getDouble(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength), i);
runningLength += schema.getLength();
} else if (dataType == DataTypes.BYTE_ARRAY) {
byte[] data = new byte[schema.getLength()];
getUnsafe().copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength, data, BYTE_ARRAY_OFFSET, data.length);
row.setByteArray(data, i);
runningLength += data.length;
} else {
throw new UnsupportedOperationException("unsupported data type for unsafe storage: " + schema.getDataType());
}
break;
case VARIABLE:
short length = getUnsafe().getShort(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength);
runningLength += 2;
byte[] data = new byte[length];
getUnsafe().copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + runningLength, data, BYTE_ARRAY_OFFSET, data.length);
runningLength += data.length;
row.setByteArray(data, i);
break;
case STRUCT:
DataMapRow structRow = ((UnsafeDataMapRow) getRow(i)).convertToSafeRow();
row.setRow(structRow, i);
runningLength += structRow.getTotalSizeInBytes();
break;
default:
throw new UnsupportedOperationException("unsupported data type for unsafe storage: " + schema.getDataType());
}
}
row.setTotalLengthInBytes(runningLength);
return row;
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockIndex method addMinMax.
protected IndexRow addMinMax(CarbonRowSchema carbonRowSchema, byte[][] minValues) {
CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
IndexRow minRow = new IndexRowImpl(minSchemas);
int minOrdinal = 0;
// min value adding
for (int i = 0; i < minValues.length; i++) {
minRow.setByteArray(minValues[i], minOrdinal++);
}
return minRow;
}
Aggregations