use of org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException in project carbondata by apache.
the class SingleThreadFinalSortFilesMerger method getSortedRecordFromFile.
/**
* This method will be used to get the sorted record from file
*
* @return sorted record sorted record
* @throws CarbonSortKeyAndGroupByException
*/
private Object[] getSortedRecordFromFile() throws CarbonDataWriterException {
Object[] row = null;
// poll the top object from heap
// heap maintains binary tree which is based on heap condition that will
// be based on comparator we are passing the heap
// when will call poll it will always delete root of the tree and then
// it does trickel down operation complexity is log(n)
SortTempFileChunkHolder poll = this.recordHolderHeapLocal.poll();
// get the row from chunk
row = poll.getRow();
// check if there no entry present
if (!poll.hasNext()) {
// if chunk is empty then close the stream
poll.closeStream();
// change the file counter
--this.fileCounter;
// reaturn row
return row;
}
// read new row
try {
poll.readRow();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
// add to heap
this.recordHolderHeapLocal.add(poll);
// return row
return row;
}
use of org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException in project carbondata by apache.
the class SortTempFileChunkHolder method initialise.
private void initialise() throws CarbonSortKeyAndGroupByException {
try {
if (isSortTempFileCompressionEnabled) {
this.bufferSize = sortTempFileNoOFRecordsInCompression;
}
stream = new DataInputStream(new BufferedInputStream(new FileInputStream(tempFile), this.fileBufferSize));
this.entryCount = stream.readInt();
if (prefetch) {
new DataFetcher(false).call();
totalRecordFetch += currentBuffer.length;
if (totalRecordFetch < this.entryCount) {
submit = executorService.submit(new DataFetcher(true));
}
} else {
if (isSortTempFileCompressionEnabled) {
new DataFetcher(false).call();
}
}
} catch (FileNotFoundException e) {
LOGGER.error(e);
throw new CarbonSortKeyAndGroupByException(tempFile + " No Found", e);
} catch (IOException e) {
LOGGER.error(e);
throw new CarbonSortKeyAndGroupByException(tempFile + " No Found", e);
} catch (Exception e) {
LOGGER.error(e);
throw new CarbonSortKeyAndGroupByException(tempFile + " Problem while reading", e);
}
}
use of org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException in project carbondata by apache.
the class SortTempFileChunkHolder method getRowFromStream.
/**
* Reads row from file
* @return Object[]
* @throws CarbonSortKeyAndGroupByException
*/
private Object[] getRowFromStream() throws CarbonSortKeyAndGroupByException {
// create new row of size 3 (1 for dims , 1 for high card , 1 for measures)
Object[] holder = new Object[3];
int index = 0;
int nonDicIndex = 0;
int[] dim = new int[this.dimensionCount];
byte[][] nonDicArray = new byte[this.noDictionaryCount + this.complexDimensionCount][];
Object[] measures = new Object[this.measureCount];
try {
// read dimension values
for (int i = 0; i < isNoDictionaryDimensionColumn.length; i++) {
if (isNoDictionaryDimensionColumn[i]) {
short len = stream.readShort();
byte[] array = new byte[len];
stream.readFully(array);
nonDicArray[nonDicIndex++] = array;
} else {
dim[index++] = stream.readInt();
}
}
for (int i = 0; i < complexDimensionCount; i++) {
short len = stream.readShort();
byte[] array = new byte[len];
stream.readFully(array);
nonDicArray[nonDicIndex++] = array;
}
index = 0;
// read measure values
for (int i = 0; i < this.measureCount; i++) {
if (stream.readByte() == 1) {
switch(aggType[i]) {
case SHORT:
measures[index++] = stream.readShort();
break;
case INT:
measures[index++] = stream.readInt();
break;
case LONG:
measures[index++] = stream.readLong();
break;
case DOUBLE:
measures[index++] = stream.readDouble();
break;
case DECIMAL:
int len = stream.readInt();
byte[] buff = new byte[len];
stream.readFully(buff);
measures[index++] = buff;
break;
}
} else {
measures[index++] = null;
}
}
NonDictionaryUtil.prepareOutObj(holder, dim, nonDicArray, measures);
// increment number if record read
this.numberOfObjectRead++;
} catch (IOException e) {
LOGGER.error("Problme while reading the madkey fom sort temp file");
throw new CarbonSortKeyAndGroupByException("Problem while reading the sort temp file ", e);
}
//return out row
return holder;
}
use of org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException in project carbondata by apache.
the class UnsafeParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
unsafeIntermediateFileMerger.finish();
List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException in project carbondata by apache.
the class UnsafeParallelReadMergeSorterWithBucketingImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
UnsafeSortDataRows[] sortDataRows = new UnsafeSortDataRows[bucketingInfo.getNumberOfBuckets()];
UnsafeIntermediateMerger[] intermediateFileMergers = new UnsafeIntermediateMerger[sortDataRows.length];
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
inMemoryChunkSizeInMB = inMemoryChunkSizeInMB / bucketingInfo.getNumberOfBuckets();
if (inMemoryChunkSizeInMB < 5) {
inMemoryChunkSizeInMB = 5;
}
try {
for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
SortParameters parameters = sortParameters.getCopy();
parameters.setPartitionID(i + "");
setTempLocation(parameters);
intermediateFileMergers[i] = new UnsafeIntermediateMerger(parameters);
sortDataRows[i] = new UnsafeSortDataRows(parameters, intermediateFileMergers[i], inMemoryChunkSizeInMB);
sortDataRows[i].initialize();
}
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRows));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRows, sortParameters);
} catch (Exception e) {
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
try {
for (int i = 0; i < intermediateFileMergers.length; i++) {
intermediateFileMergers[i].finish();
}
} catch (Exception e) {
throw new CarbonDataLoadingException(e);
}
Iterator<CarbonRowBatch>[] batchIterator = new Iterator[bucketingInfo.getNumberOfBuckets()];
for (int i = 0; i < sortDataRows.length; i++) {
batchIterator[i] = new MergedDataIterator(String.valueOf(i), batchSize, intermediateFileMergers[i]);
}
return batchIterator;
}
Aggregations