use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.
the class BlockletDataMap method addMinMax.
private DataMapRow addMinMax(int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minValues) {
CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
DataMapRow minRow = new DataMapRowImpl(minSchemas);
int minOrdinal = 0;
// min value adding
for (int i = 0; i < minMaxLen.length; i++) {
minRow.setByteArray(minValues[i], minOrdinal++);
}
return minRow;
}
use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.
the class BlockletDataMap method init.
@Override
public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
long startTime = System.currentTimeMillis();
assert (dataMapModel instanceof BlockletDataMapModel);
BlockletDataMapModel blockletDataMapInfo = (BlockletDataMapModel) dataMapModel;
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(blockletDataMapInfo.getFilePath(), blockletDataMapInfo.getFileData());
Path path = new Path(blockletDataMapInfo.getFilePath());
byte[] filePath = path.getParent().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
byte[] fileName = path.getName().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
byte[] segmentId = blockletDataMapInfo.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
DataMapRowImpl summaryRow = null;
byte[] schemaBinary = null;
// below 2 variables will be used for fetching the relative blocklet id. Relative blocklet ID
// is id assigned to a blocklet within a part file
String tempFilePath = null;
int relativeBlockletId = 0;
for (DataFileFooter fileFooter : indexInfo) {
if (segmentProperties == null) {
List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
schemaBinary = convertSchemaToBinary(columnInTable);
columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
createSchema(segmentProperties);
createSummarySchema(segmentProperties, schemaBinary, filePath, fileName, segmentId);
}
TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
BlockMetaInfo blockMetaInfo = blockletDataMapInfo.getBlockMetaInfoMap().get(blockInfo.getFilePath());
// the file exists physically or not
if (blockMetaInfo != null) {
if (fileFooter.getBlockletList() == null) {
// This is old store scenario, here blocklet information is not available in index file so
// load only block info
summaryRow = loadToUnsafeBlock(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo);
} else {
// blocklet ID will start from 0 again only when part file path is changed
if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
tempFilePath = blockInfo.getFilePath();
relativeBlockletId = 0;
}
summaryRow = loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
// this is done because relative blocklet id need to be incremented based on the
// total number of blocklets
relativeBlockletId += fileFooter.getBlockletList().size();
}
}
}
if (unsafeMemoryDMStore != null) {
unsafeMemoryDMStore.finishWriting();
}
if (null != unsafeMemorySummaryDMStore) {
addTaskSummaryRowToUnsafeMemoryStore(summaryRow, schemaBinary, filePath, fileName, segmentId);
unsafeMemorySummaryDMStore.finishWriting();
}
LOGGER.info("Time taken to load blocklet datamap from file : " + dataMapModel.getFilePath() + "is " + (System.currentTimeMillis() - startTime));
}
use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.
the class BlockletDataMap method loadToUnsafeBlock.
/**
* Load information for the block.It is the case can happen only for old stores
* where blocklet information is not available in index file. So load only block information
* and read blocklet information in executor.
*/
private DataMapRowImpl loadToUnsafeBlock(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo) {
int[] minMaxLen = segmentProperties.getColumnsValueSize();
BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
// Add one row to maintain task level min max for segment pruning
if (summaryRow == null) {
summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
}
DataMapRow row = new DataMapRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 0;
// add start key as index key
row.setByteArray(blockletIndex.getBtreeIndex().getStartKey(), ordinal++);
BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
// update min max values in case of old store
byte[][] updatedMinValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
byte[][] updatedMaxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMinValues), ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMinValues, TASK_MIN_VALUES_INDEX, true);
ordinal++;
taskMinMaxOrdinal++;
row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMaxValues), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMaxValues, TASK_MAX_VALUES_INDEX, false);
ordinal++;
row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
// add file path
byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add pages
row.setShort((short) 0, ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
// add blocklet info
row.setByteArray(new byte[0], ordinal++);
row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
try {
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
ordinal++;
// for relative blocklet id. Value is -1 because in case of old store blocklet info will
// not be present in the index file and in that case we will not knwo the total number of
// blocklets
row.setShort((short) -1, ordinal++);
// store block size
row.setLong(blockMetaInfo.getSize(), ordinal);
unsafeMemoryDMStore.addIndexRowToUnsafe(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.
the class BlockletDataMap method addTaskMinMaxValues.
/**
* This method will compute min/max values at task level
*
* @param taskMinMaxRow
* @param minMaxLen
* @param carbonRowSchema
* @param minMaxValue
* @param ordinal
* @param isMinValueComparison
*/
private void addTaskMinMaxValues(DataMapRow taskMinMaxRow, int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minMaxValue, int ordinal, boolean isMinValueComparison) {
DataMapRow row = taskMinMaxRow.getRow(ordinal);
byte[][] updatedMinMaxValues = minMaxValue;
if (null == row) {
CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
row = new DataMapRowImpl(minSchemas);
} else {
byte[][] existingMinMaxValues = getMinMaxValue(taskMinMaxRow, ordinal);
// Compare and update min max values
for (int i = 0; i < minMaxLen.length; i++) {
int compare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(existingMinMaxValues[i], minMaxValue[i]);
if (isMinValueComparison) {
if (compare < 0) {
updatedMinMaxValues[i] = existingMinMaxValues[i];
}
} else if (compare > 0) {
updatedMinMaxValues[i] = existingMinMaxValues[i];
}
}
}
int minMaxOrdinal = 0;
// min/max value adding
for (int i = 0; i < minMaxLen.length; i++) {
row.setByteArray(updatedMinMaxValues[i], minMaxOrdinal++);
}
taskMinMaxRow.setRow(row, ordinal);
}
use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.
the class BlockletDataMap method loadToUnsafe.
private DataMapRowImpl loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
int[] minMaxLen = segmentProperties.getColumnsValueSize();
List<BlockletInfo> blockletList = fileFooter.getBlockletList();
CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
// Add one row to maintain task level min max for segment pruning
if (!blockletList.isEmpty() && summaryRow == null) {
summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
}
for (int index = 0; index < blockletList.size(); index++) {
DataMapRow row = new DataMapRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 0;
BlockletInfo blockletInfo = blockletList.get(index);
// add start key as index key
row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);
BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
row.setRow(addMinMax(minMaxLen, schema[ordinal], minValues), ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], minValues, TASK_MIN_VALUES_INDEX, true);
ordinal++;
taskMinMaxOrdinal++;
byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
row.setRow(addMinMax(minMaxLen, schema[ordinal], maxValues), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], maxValues, TASK_MAX_VALUES_INDEX, false);
ordinal++;
row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
// add file path
byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add pages
row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
// add blocklet info
byte[] serializedData;
try {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
DataOutput dataOutput = new DataOutputStream(stream);
blockletInfo.write(dataOutput);
serializedData = stream.toByteArray();
row.setByteArray(serializedData, ordinal++);
// Add block footer offset, it is used if we need to read footer of block
row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
ordinal++;
// for relative blockelt id i.e blocklet id that belongs to a particular part file
row.setShort((short) relativeBlockletId++, ordinal++);
// Store block size
row.setLong(blockMetaInfo.getSize(), ordinal);
unsafeMemoryDMStore.addIndexRowToUnsafe(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return summaryRow;
}
Aggregations