use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class AbstractFactDataWriter method closeWriter.
/**
* Method will be used to close the open file channel
*
* @throws CarbonDataWriterException
*/
public void closeWriter() throws CarbonDataWriterException {
CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel);
if (this.blockletInfoList.size() > 0) {
renameCarbonDataFile();
copyCarbonDataFileToCarbonStorePath(this.carbonDataFileTempPath.substring(0, this.carbonDataFileTempPath.lastIndexOf('.')));
try {
writeIndexFile();
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the index file", e);
}
}
closeExecutorService();
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class SingleThreadFinalSortFilesMerger method getSortedRecordFromFile.
/**
* This method will be used to get the sorted record from file
*
* @return sorted record sorted record
* @throws CarbonSortKeyAndGroupByException
*/
private Object[] getSortedRecordFromFile() throws CarbonDataWriterException {
Object[] row = null;
// poll the top object from heap
// heap maintains binary tree which is based on heap condition that will
// be based on comparator we are passing the heap
// when will call poll it will always delete root of the tree and then
// it does trickel down operation complexity is log(n)
SortTempFileChunkHolder poll = this.recordHolderHeapLocal.poll();
// get the row from chunk
row = poll.getRow();
// check if there no entry present
if (!poll.hasNext()) {
// if chunk is empty then close the stream
poll.closeStream();
// change the file counter
--this.fileCounter;
// reaturn row
return row;
}
// read new row
try {
poll.readRow();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
// add to heap
this.recordHolderHeapLocal.add(poll);
// return row
return row;
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletInfoToFile.
@Override
protected void writeBlockletInfoToFile(FileChannel channel, String filePath) throws CarbonDataWriterException {
try {
// get the current file position
long currentPosition = channel.size();
// get thrift file footer instance
FileFooter3 convertFileMeta = CarbonMetadataUtil.convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, thriftColumnSchemaList.size(), dataWriterVo.getSegmentProperties());
// fill the carbon index details
fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), carbonDataFileName, currentPosition);
// write the footer
byte[] byteArray = CarbonUtil.getByteArray(convertFileMeta);
ByteBuffer buffer = ByteBuffer.allocate(byteArray.length + CarbonCommonConstants.LONG_SIZE_IN_BYTE);
buffer.put(byteArray);
buffer.putLong(currentPosition);
buffer.flip();
channel.write(buffer);
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the carbon file: ", e);
}
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeDataToFile.
/**
* Below method will be used to write data in carbon data file
* Data Format
* <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
* <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
* <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
* <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
* Each page will contain column data, Inverted index and rle index
*
* @param channel
* @param dataChunkBytes
*/
private void writeDataToFile(FileChannel channel, byte[][] dataChunkBytes) {
long offset = 0;
// write the header
try {
if (fileChannel.size() == 0) {
// below code is to write the file header
byte[] fileHeader = CarbonUtil.getByteArray(CarbonMetadataUtil.getFileHeader(true, thriftColumnSchemaList, dataWriterVo.getSchemaUpdatedTimeStamp()));
ByteBuffer buffer = ByteBuffer.allocate(fileHeader.length);
buffer.put(fileHeader);
buffer.flip();
fileChannel.write(buffer);
}
offset = channel.size();
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the file channel size");
}
// to maintain the offset of each data chunk in blocklet
List<Long> currentDataChunksOffset = new ArrayList<>();
// to maintain the length of each data chunk in blocklet
List<Integer> currentDataChunksLength = new ArrayList<>();
// get the node holder list
List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
int numberOfDimension = nodeHolderList.get(0).getKeyArray().length;
int numberOfMeasures = nodeHolderList.get(0).getDataArray().length;
NodeHolder nodeHolder = null;
ByteBuffer buffer = null;
int bufferSize = 0;
long dimensionOffset = 0;
long measureOffset = 0;
int numberOfRows = 0;
// calculate the number of rows in each blocklet
for (int j = 0; j < nodeHolderList.size(); j++) {
numberOfRows += nodeHolderList.get(j).getEntryCount();
}
try {
for (int i = 0; i < numberOfDimension; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[i].length);
buffer = ByteBuffer.allocate(dataChunkBytes[i].length);
buffer.put(dataChunkBytes[i]);
buffer.flip();
fileChannel.write(buffer);
offset += dataChunkBytes[i].length;
for (int j = 0; j < nodeHolderList.size(); j++) {
nodeHolder = nodeHolderList.get(j);
bufferSize = nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[i] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[i].length : 0);
buffer = ByteBuffer.allocate(bufferSize);
buffer.put(nodeHolder.getKeyArray()[i]);
if (!nodeHolder.getIsSortedKeyBlock()[i]) {
buffer.putInt(nodeHolder.getCompressedIndex()[i].length);
buffer.put(nodeHolder.getCompressedIndex()[i]);
if (nodeHolder.getCompressedIndexMap()[i].length > 0) {
buffer.put(nodeHolder.getCompressedIndexMap()[i]);
}
}
if (nodeHolder.getAggBlocks()[i]) {
buffer.put(nodeHolder.getCompressedDataIndex()[i]);
}
buffer.flip();
fileChannel.write(buffer);
offset += bufferSize;
}
}
dimensionOffset = offset;
int dataChunkStartIndex = nodeHolderList.get(0).getKeyArray().length;
for (int i = 0; i < numberOfMeasures; i++) {
nodeHolderList = dataWriterHolder.getNodeHolder();
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
buffer = ByteBuffer.allocate(dataChunkBytes[dataChunkStartIndex].length);
buffer.put(dataChunkBytes[dataChunkStartIndex]);
buffer.flip();
fileChannel.write(buffer);
offset += dataChunkBytes[dataChunkStartIndex].length;
dataChunkStartIndex++;
for (int j = 0; j < nodeHolderList.size(); j++) {
nodeHolder = nodeHolderList.get(j);
bufferSize = nodeHolder.getDataArray()[i].length;
buffer = ByteBuffer.allocate(bufferSize);
buffer.put(nodeHolder.getDataArray()[i]);
buffer.flip();
fileChannel.write(buffer);
offset += bufferSize;
}
}
measureOffset = offset;
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the data", e);
}
blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(nodeHolderList, dataWriterVo.getSegmentProperties().getMeasures()));
BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, dataWriterHolder.getNodeHolder().size());
blockletMetadata.add(blockletInfo3);
}
use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.
the class CarbonFactDataWriterImplV1 method buildDataNodeHolder.
@Override
public NodeHolder buildDataNodeHolder(IndexStorage<int[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
// set the empty byte array
if (null == noDictionaryEndKey) {
noDictionaryEndKey = new byte[0];
}
if (null == noDictionaryStartKey) {
noDictionaryStartKey = new byte[0];
}
// total measure length;
int totalMsrArrySize = 0;
// current measure length;
int currentMsrLenght = 0;
int totalKeySize = 0;
int keyBlockSize = 0;
boolean[] isSortedData = new boolean[keyStorageArray.length];
int[] keyLengths = new int[keyStorageArray.length];
//below will calculate min and max value for each column
//for below 2d array, first index will be for column and second will be min max
// value for same column
// byte[][] columnMinMaxData = new byte[keyStorageArray.length][];
byte[][] allMinValue = new byte[keyStorageArray.length][];
byte[][] allMaxValue = new byte[keyStorageArray.length][];
byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
boolean[] colGrpBlock = new boolean[keyStorageArray.length];
for (int i = 0; i < keyLengths.length; i++) {
keyLengths[i] = keyBlockData[i].length;
isSortedData[i] = keyStorageArray[i].isAlreadySorted();
if (!isSortedData[i]) {
keyBlockSize++;
}
totalKeySize += keyLengths[i];
if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
allMinValue[i] = keyStorageArray[i].getMin();
allMaxValue[i] = keyStorageArray[i].getMax();
} else {
allMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
allMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
}
//if keyStorageArray is instance of ColGroupBlockStorage than it's colGroup chunk
if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
colGrpBlock[i] = true;
}
}
int[] keyBlockIdxLengths = new int[keyBlockSize];
byte[][] dataAfterCompression = new byte[keyBlockSize][];
byte[][] indexMap = new byte[keyBlockSize][];
int idx = 0;
for (int i = 0; i < isSortedData.length; i++) {
if (!isSortedData[i]) {
dataAfterCompression[idx] = numberCompressor.compress(keyStorageArray[i].getDataAfterComp());
if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
indexMap[idx] = numberCompressor.compress(keyStorageArray[i].getIndexMap());
} else {
indexMap[idx] = new byte[0];
}
keyBlockIdxLengths[idx] = (dataAfterCompression[idx].length + indexMap[idx].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
idx++;
}
}
int compressDataBlockSize = 0;
for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
if (dataWriterVo.getAggBlocks()[i]) {
compressDataBlockSize++;
}
}
byte[][] compressedDataIndex = new byte[compressDataBlockSize][];
int[] dataIndexMapLength = new int[compressDataBlockSize];
idx = 0;
for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
if (dataWriterVo.getAggBlocks()[i]) {
try {
compressedDataIndex[idx] = numberCompressor.compress(keyStorageArray[i].getDataIndexMap());
dataIndexMapLength[idx] = compressedDataIndex[idx].length;
idx++;
} catch (Exception e) {
throw new CarbonDataWriterException(e.getMessage());
}
}
}
int[] msrLength = new int[dataWriterVo.getMeasureCount()];
// each measure size
for (int i = 0; i < measureArray.length; i++) {
currentMsrLenght = measureArray[i].length;
totalMsrArrySize += currentMsrLenght;
msrLength[i] = currentMsrLenght;
}
NodeHolder holder = new NodeHolder();
holder.setDataArray(measureArray);
holder.setKeyArray(keyBlockData);
holder.setMeasureNullValueIndex(nullValueIndexBitSet);
// end key format will be <length of dictionary key><length of no
// dictionary key><DictionaryKey><No Dictionary key>
byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
buffer.putInt(endKey.length);
buffer.putInt(updatedNoDictionaryEndKey.length);
buffer.put(endKey);
buffer.put(updatedNoDictionaryEndKey);
buffer.rewind();
holder.setEndKey(buffer.array());
holder.setMeasureLenght(msrLength);
byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
// start key format will be <length of dictionary key><length of no
// dictionary key><DictionaryKey><No Dictionary key>
buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
buffer.putInt(startKey.length);
buffer.putInt(updatedNoDictionaryStartKey.length);
buffer.put(startKey);
buffer.put(updatedNoDictionaryStartKey);
buffer.rewind();
holder.setStartKey(buffer.array());
holder.setEntryCount(entryCount);
holder.setKeyLengths(keyLengths);
holder.setKeyBlockIndexLength(keyBlockIdxLengths);
holder.setIsSortedKeyBlock(isSortedData);
holder.setCompressedIndex(dataAfterCompression);
holder.setCompressedIndexMap(indexMap);
holder.setDataIndexMapLength(dataIndexMapLength);
holder.setCompressedDataIndex(compressedDataIndex);
holder.setCompressionModel(compressionModel);
holder.setTotalDimensionArrayLength(totalKeySize);
holder.setTotalMeasureArrayLength(totalMsrArrySize);
//setting column min max value
holder.setColumnMaxData(allMaxValue);
holder.setColumnMinData(allMinValue);
holder.setAggBlocks(dataWriterVo.getAggBlocks());
holder.setColGrpBlocks(colGrpBlock);
return holder;
}
Aggregations