use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV2 method writeDataToFile.
/**
* Below method will be used to write the data to file
* Data Format
* <DColumn1DataChunk><DColumnDataPage><DColumnRle>
* <DColumn2DataChunk><DColumn2DataPage><DColumn2RowIds><DColumn2Rle>
* <DColumn3DataChunk><DColumn3DataPage><column3RowIds>
* <MColumn1DataChunk><MColumn1DataPage>
* <MColumn2DataChunk><MColumn2DataPage>
* <MColumn2DataChunk><MColumn2DataPage>
*
* @param nodeHolder
* @param dataChunksBytes
* @param channel
* @throws CarbonDataWriterException
*/
private void writeDataToFile(NodeHolder nodeHolder, byte[][] dataChunksBytes, FileChannel channel) throws CarbonDataWriterException {
long offset = 0;
try {
offset = channel.size();
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the file channel size");
}
List<Long> currentDataChunksOffset = new ArrayList<>();
List<Short> currentDataChunksLength = new ArrayList<>();
dataChunksLength.add(currentDataChunksLength);
dataChunksOffsets.add(currentDataChunksOffset);
int bufferSize = 0;
int rowIdIndex = 0;
int rleIndex = 0;
for (int i = 0; i < nodeHolder.getIsSortedKeyBlock().length; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add((short) dataChunksBytes[i].length);
bufferSize += dataChunksBytes[i].length + nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[rowIdIndex] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[rleIndex].length : 0);
offset += dataChunksBytes[i].length;
offset += nodeHolder.getKeyLengths()[i];
if (!nodeHolder.getIsSortedKeyBlock()[i]) {
offset += nodeHolder.getKeyBlockIndexLength()[rowIdIndex];
rowIdIndex++;
}
if (dataWriterVo.getAggBlocks()[i]) {
offset += nodeHolder.getDataIndexMapLength()[rleIndex];
rleIndex++;
}
}
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
rleIndex = 0;
rowIdIndex = 0;
for (int i = 0; i < nodeHolder.getIsSortedKeyBlock().length; i++) {
buffer.put(dataChunksBytes[i]);
buffer.put(nodeHolder.getKeyArray()[i]);
if (!nodeHolder.getIsSortedKeyBlock()[i]) {
buffer.putInt(nodeHolder.getCompressedIndex()[rowIdIndex].length);
buffer.put(nodeHolder.getCompressedIndex()[rowIdIndex]);
if (nodeHolder.getCompressedIndexMap()[rowIdIndex].length > 0) {
buffer.put(nodeHolder.getCompressedIndexMap()[rowIdIndex]);
}
rowIdIndex++;
}
if (dataWriterVo.getAggBlocks()[i]) {
buffer.put(nodeHolder.getCompressedDataIndex()[rleIndex]);
rleIndex++;
}
}
try {
buffer.flip();
channel.write(buffer);
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the dimension data in carbon data file", e);
}
int dataChunkIndex = nodeHolder.getKeyArray().length;
int totalLength = 0;
for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add((short) dataChunksBytes[dataChunkIndex].length);
offset += dataChunksBytes[dataChunkIndex].length;
offset += nodeHolder.getDataArray()[i].length;
totalLength += dataChunksBytes[dataChunkIndex].length;
totalLength += nodeHolder.getDataArray()[i].length;
dataChunkIndex++;
}
buffer = ByteBuffer.allocate(totalLength);
dataChunkIndex = nodeHolder.getKeyArray().length;
for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
buffer.put(dataChunksBytes[dataChunkIndex++]);
buffer.put(nodeHolder.getDataArray()[i]);
}
try {
buffer.flip();
channel.write(buffer);
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the measure data in carbon data file", e);
}
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeDataToFile.
private void writeDataToFile(FileChannel channel) {
// get the list of node holder list
List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
long blockletDataSize = 0;
// get data chunks for all the column
byte[][] dataChunkBytes = new byte[nodeHolderList.get(0).getKeyArray().length + nodeHolderList.get(0).getDataArray().length][];
int measureStartIndex = nodeHolderList.get(0).getKeyArray().length;
// calculate the size of data chunks
try {
for (int i = 0; i < nodeHolderList.get(0).getKeyArray().length; i++) {
dataChunkBytes[i] = CarbonUtil.getByteArray(CarbonMetadataUtil.getDataChunk3(nodeHolderList, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), i, true));
blockletDataSize += dataChunkBytes[i].length;
}
for (int i = 0; i < nodeHolderList.get(0).getDataArray().length; i++) {
dataChunkBytes[measureStartIndex] = CarbonUtil.getByteArray(CarbonMetadataUtil.getDataChunk3(nodeHolderList, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), i, false));
blockletDataSize += dataChunkBytes[measureStartIndex].length;
measureStartIndex++;
}
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the data chunks", e);
}
// calculate the total size of data to be written
blockletDataSize += dataWriterHolder.getSize();
// to check if data size will exceed the block size then create a new file
updateBlockletFileChannel(blockletDataSize);
// write data to file
writeDataToFile(fileChannel, dataChunkBytes);
// clear the data holder
dataWriterHolder.clear();
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method buildDataNodeHolder.
/**
* Below method will be used to build the node holder object
* This node holder object will be used to persist data which will
* be written in carbon data file
*/
@Override
public NodeHolder buildDataNodeHolder(IndexStorage<short[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
// set the empty byte array
if (null == noDictionaryEndKey) {
noDictionaryEndKey = new byte[0];
}
if (null == noDictionaryStartKey) {
noDictionaryStartKey = new byte[0];
}
// total measure length;
int totalMsrArrySize = 0;
// current measure length;
int currentMsrLenght = 0;
int totalKeySize = 0;
int keyBlockSize = 0;
boolean[] isSortedData = new boolean[keyStorageArray.length];
int[] keyLengths = new int[keyStorageArray.length];
// below will calculate min and max value for each column
// for below 2d array, first index will be for column and second will be min and max
// value for same column
byte[][] dimensionMinValue = new byte[keyStorageArray.length][];
byte[][] dimensionMaxValue = new byte[keyStorageArray.length][];
byte[][] measureMinValue = new byte[measureArray.length][];
byte[][] measureMaxValue = new byte[measureArray.length][];
byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
boolean[] colGrpBlock = new boolean[keyStorageArray.length];
for (int i = 0; i < keyLengths.length; i++) {
keyLengths[i] = keyBlockData[i].length;
isSortedData[i] = keyStorageArray[i].isAlreadySorted();
keyBlockSize++;
totalKeySize += keyLengths[i];
if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
dimensionMinValue[i] = keyStorageArray[i].getMin();
dimensionMaxValue[i] = keyStorageArray[i].getMax();
} else {
dimensionMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
dimensionMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
}
// colGroup chunk
if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
colGrpBlock[i] = true;
}
}
for (int i = 0; i < measureArray.length; i++) {
measureMaxValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMaxValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
measureMinValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMinValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
}
int[] keyBlockIdxLengths = new int[keyBlockSize];
byte[][] dataAfterCompression = new byte[keyBlockSize][];
byte[][] indexMap = new byte[keyBlockSize][];
for (int i = 0; i < isSortedData.length; i++) {
if (!isSortedData[i]) {
dataAfterCompression[i] = getByteArray(keyStorageArray[i].getDataAfterComp());
if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
indexMap[i] = getByteArray(keyStorageArray[i].getIndexMap());
} else {
indexMap[i] = new byte[0];
}
keyBlockIdxLengths[i] = (dataAfterCompression[i].length + indexMap[i].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
}
}
byte[][] compressedDataIndex = new byte[keyBlockSize][];
int[] dataIndexMapLength = new int[keyBlockSize];
for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
if (dataWriterVo.getAggBlocks()[i]) {
try {
compressedDataIndex[i] = getByteArray(keyStorageArray[i].getDataIndexMap());
dataIndexMapLength[i] = compressedDataIndex[i].length;
} catch (Exception e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
}
}
int[] msrLength = new int[dataWriterVo.getMeasureCount()];
// each measure size
for (int i = 0; i < measureArray.length; i++) {
currentMsrLenght = measureArray[i].length;
totalMsrArrySize += currentMsrLenght;
msrLength[i] = currentMsrLenght;
}
NodeHolder holder = new NodeHolder();
holder.setDataArray(measureArray);
holder.setKeyArray(keyBlockData);
holder.setMeasureNullValueIndex(nullValueIndexBitSet);
// end key format will be <length of dictionary key><length of no
// dictionary key><DictionaryKey><No Dictionary key>
byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
buffer.putInt(endKey.length);
buffer.putInt(updatedNoDictionaryEndKey.length);
buffer.put(endKey);
buffer.put(updatedNoDictionaryEndKey);
buffer.rewind();
holder.setEndKey(buffer.array());
holder.setMeasureLenght(msrLength);
byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
// start key format will be <length of dictionary key><length of no
// dictionary key><DictionaryKey><No Dictionary key>
buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
buffer.putInt(startKey.length);
buffer.putInt(updatedNoDictionaryStartKey.length);
buffer.put(startKey);
buffer.put(updatedNoDictionaryStartKey);
buffer.rewind();
holder.setStartKey(buffer.array());
holder.setEntryCount(entryCount);
holder.setKeyLengths(keyLengths);
holder.setKeyBlockIndexLength(keyBlockIdxLengths);
holder.setIsSortedKeyBlock(isSortedData);
holder.setCompressedIndex(dataAfterCompression);
holder.setCompressedIndexMap(indexMap);
holder.setDataIndexMapLength(dataIndexMapLength);
holder.setCompressedDataIndex(compressedDataIndex);
holder.setCompressionModel(compressionModel);
holder.setTotalDimensionArrayLength(totalKeySize);
holder.setTotalMeasureArrayLength(totalMsrArrySize);
holder.setMeasureColumnMaxData(measureMaxValue);
holder.setMeasureColumnMinData(measureMinValue);
// setting column min max value
holder.setColumnMaxData(dimensionMaxValue);
holder.setColumnMinData(dimensionMinValue);
holder.setAggBlocks(dataWriterVo.getAggBlocks());
holder.setColGrpBlocks(colGrpBlock);
List<byte[]> dimensionDataChunk2 = null;
List<byte[]> measureDataChunk2 = null;
try {
dimensionDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), true);
measureDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), false);
} catch (IOException e) {
throw new CarbonDataWriterException(e.getMessage());
}
holder.setHolderSize(calculateSize(holder, dimensionDataChunk2, measureDataChunk2));
return holder;
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class SingleThreadFinalSortFilesMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*
* @throws CarbonSortKeyAndGroupByException
*/
private void startSorting(File[] files) throws CarbonDataWriterException {
this.fileCounter = files.length;
if (fileCounter == 0) {
LOGGER.info("No files to merge sort");
return;
}
this.fileBufferSize = CarbonDataProcessorUtil.getFileBufferSize(this.fileCounter, CarbonProperties.getInstance(), CarbonCommonConstants.CONSTANT_SIZE_TEN);
LOGGER.info("Number of temp file: " + this.fileCounter);
LOGGER.info("File Buffer Size: " + this.fileBufferSize);
// create record holder heap
createRecordHolderQueue(files);
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from each file");
int maxThreadForSorting = 0;
try {
maxThreadForSorting = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD, CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE));
} catch (NumberFormatException e) {
maxThreadForSorting = Integer.parseInt(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE);
}
ExecutorService service = Executors.newFixedThreadPool(maxThreadForSorting);
for (final File tempFile : files) {
Callable<Void> runnable = new Callable<Void>() {
@Override
public Void call() throws CarbonSortKeyAndGroupByException {
// create chunk holder
SortTempFileChunkHolder sortTempFileChunkHolder = new SortTempFileChunkHolder(tempFile, dimensionCount, complexDimensionCount, measureCount, fileBufferSize, noDictionaryCount, measureDataType, isNoDictionaryColumn, isNoDictionarySortColumn);
// initialize
sortTempFileChunkHolder.initialize();
sortTempFileChunkHolder.readRow();
synchronized (LOCKOBJECT) {
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
// add to heap
return null;
}
};
service.submit(runnable);
}
service.shutdown();
try {
service.awaitTermination(2, TimeUnit.HOURS);
} catch (Exception e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class AbstractFactDataWriter method renameCarbonDataFile.
/**
* This method will rename carbon data file from in progress status to normal
*
* @throws CarbonDataWriterException
*/
protected void renameCarbonDataFile() throws CarbonDataWriterException {
File origFile = new File(this.carbonDataFileTempPath.substring(0, this.carbonDataFileTempPath.lastIndexOf('.')));
File curFile = new File(this.carbonDataFileTempPath);
if (!curFile.renameTo(origFile)) {
throw new CarbonDataWriterException("Problem while renaming the file");
}
}
Aggregations