use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonLoaderUtilTest method nodeBlockMappingTestWith6Blocks4nodes.
/**
* Test case with 6 blocks and 4 nodes where 4 th node doesnt have any local data.
*
* @throws Exception
*/
@Test
public void nodeBlockMappingTestWith6Blocks4nodes() throws Exception {
Map<TableBlockInfo, List<String>> inputMap = new HashMap<TableBlockInfo, List<String>>(5);
TableBlockInfo block1 = new TableBlockInfo("part-0-0-1462341987000", 123, "1", new String[] { "1", "2", "3" }, 111);
TableBlockInfo block2 = new TableBlockInfo("part-1-0-1462341987000", 123, "2", new String[] { "1", "2", "3" }, 111);
TableBlockInfo block3 = new TableBlockInfo("part-2-0-1462341987000", 123, "3", new String[] { "1", "2", "3" }, 111);
TableBlockInfo block4 = new TableBlockInfo("part-3-0-1462341987000", 123, "4", new String[] { "1", "2", "3" }, 111);
TableBlockInfo block5 = new TableBlockInfo("part-4-0-1462341987000", 123, "5", new String[] { "1", "2", "3" }, 111);
TableBlockInfo block6 = new TableBlockInfo("part-5-0-1462341987000", 123, "6", new String[] { "1", "2", "3" }, 111);
inputMap.put(block1, Arrays.asList(new String[] { "1", "2", "3" }));
inputMap.put(block2, Arrays.asList(new String[] { "1", "2", "3" }));
inputMap.put(block3, Arrays.asList(new String[] { "1", "2", "3" }));
inputMap.put(block4, Arrays.asList(new String[] { "1", "2", "3" }));
inputMap.put(block5, Arrays.asList(new String[] { "1", "2", "3" }));
inputMap.put(block6, Arrays.asList(new String[] { "1", "2", "3" }));
List<TableBlockInfo> inputBlocks = new ArrayList(6);
inputBlocks.add(block1);
inputBlocks.add(block2);
inputBlocks.add(block3);
inputBlocks.add(block4);
inputBlocks.add(block5);
inputBlocks.add(block6);
Map<String, List<TableBlockInfo>> outputMap = CarbonLoaderUtil.nodeBlockMapping(inputBlocks, 4);
Assert.assertTrue(calculateBlockDistribution(inputMap, outputMap, 6, 4));
Assert.assertTrue(calculateBlockLocality(inputMap, outputMap, 6, 4));
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonLoaderUtilTest method calculateBlockLocality.
private boolean calculateBlockLocality(Map<TableBlockInfo, List<String>> inputMap, Map<String, List<TableBlockInfo>> outputMap, int numberOfBlocks, int numberOfNodes) {
double notInNodeLocality = 0;
for (Map.Entry<String, List<TableBlockInfo>> entry : outputMap.entrySet()) {
List<TableBlockInfo> blockListOfANode = entry.getValue();
for (TableBlockInfo eachBlock : blockListOfANode) {
// for each block check the node locality
List<String> blockLocality = inputMap.get(eachBlock);
if (!blockLocality.contains(entry.getKey())) {
notInNodeLocality++;
}
}
}
System.out.println(((notInNodeLocality / numberOfBlocks) * 100) + " " + "is the node locality mismatch");
if ((notInNodeLocality / numberOfBlocks) * 100 > 30) {
return false;
}
return true;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonCompactionUtil method createDataFileFooterMappingForSegments.
/**
* To create a mapping of Segment Id and DataFileFooter.
*
* @param tableBlockInfoList
* @return
*/
public static Map<String, List<DataFileFooter>> createDataFileFooterMappingForSegments(List<TableBlockInfo> tableBlockInfoList) throws IOException {
Map<String, List<DataFileFooter>> segmentBlockInfoMapping = new HashMap<>();
for (TableBlockInfo blockInfo : tableBlockInfoList) {
List<DataFileFooter> eachSegmentBlocks = new ArrayList<>();
String segId = blockInfo.getSegmentId();
DataFileFooter dataFileMatadata = null;
// check if segId is already present in map
List<DataFileFooter> metadataList = segmentBlockInfoMapping.get(segId);
dataFileMatadata = CarbonUtil.readMetadatFile(blockInfo);
if (null == metadataList) {
// if it is not present
eachSegmentBlocks.add(dataFileMatadata);
segmentBlockInfoMapping.put(segId, eachSegmentBlocks);
} else {
// if its already present then update the list.
metadataList.add(dataFileMatadata);
}
}
return segmentBlockInfoMapping;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class SegmentTaskIndexStoreTest method setUp.
@BeforeClass
public static void setUp() {
CacheProvider cacheProvider = CacheProvider.getInstance();
taskIndexStore = (SegmentTaskIndexStore) cacheProvider.<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper>createCache(CacheType.DRIVER_BTREE, "");
tableBlockInfo = new TableBlockInfo("file", 0L, "SG100", locations, 10L, ColumnarFormatVersion.valueOf(version));
absoluteTableIdentifier = new AbsoluteTableIdentifier("/tmp", new CarbonTableIdentifier("testdatabase", "testtable", "TB100"));
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonInputFormat method getTableBlockInfo.
/**
* Below method will be used to get the table block info
*
* @param job job context
* @param segmentId number of segment id
* @return list of table block
* @throws IOException
*/
private List<TableBlockInfo> getTableBlockInfo(JobContext job, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier, Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys, UpdateVO updateDetails, SegmentUpdateStatusManager updateStatusManager, String segmentId, Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys) throws IOException {
List<TableBlockInfo> tableBlockInfoList = new ArrayList<TableBlockInfo>();
// get file location of all files of given segment
JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, tableSegmentUniqueIdentifier.getSegmentId() + "");
// identify table blocks
for (InputSplit inputSplit : getSplitsInternal(newJob)) {
CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
// then add as TableInfo object.
if (isValidBlockBasedOnUpdateDetails(taskKeys, carbonInputSplit, updateDetails, updateStatusManager, segmentId, validTaskKeys)) {
BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), tableSegmentUniqueIdentifier.getSegmentId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion(), carbonInputSplit.getBlockStorageIdMap()));
}
}
return tableBlockInfoList;
}
Aggregations