Search in sources :

Example 11 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonLoaderUtilTest method nodeBlockMappingTestWith6Blocks4nodes.

/**
   * Test case with 6 blocks and 4 nodes where 4 th node doesnt have any local data.
   *
   * @throws Exception
   */
@Test
public void nodeBlockMappingTestWith6Blocks4nodes() throws Exception {
    Map<TableBlockInfo, List<String>> inputMap = new HashMap<TableBlockInfo, List<String>>(5);
    TableBlockInfo block1 = new TableBlockInfo("part-0-0-1462341987000", 123, "1", new String[] { "1", "2", "3" }, 111);
    TableBlockInfo block2 = new TableBlockInfo("part-1-0-1462341987000", 123, "2", new String[] { "1", "2", "3" }, 111);
    TableBlockInfo block3 = new TableBlockInfo("part-2-0-1462341987000", 123, "3", new String[] { "1", "2", "3" }, 111);
    TableBlockInfo block4 = new TableBlockInfo("part-3-0-1462341987000", 123, "4", new String[] { "1", "2", "3" }, 111);
    TableBlockInfo block5 = new TableBlockInfo("part-4-0-1462341987000", 123, "5", new String[] { "1", "2", "3" }, 111);
    TableBlockInfo block6 = new TableBlockInfo("part-5-0-1462341987000", 123, "6", new String[] { "1", "2", "3" }, 111);
    inputMap.put(block1, Arrays.asList(new String[] { "1", "2", "3" }));
    inputMap.put(block2, Arrays.asList(new String[] { "1", "2", "3" }));
    inputMap.put(block3, Arrays.asList(new String[] { "1", "2", "3" }));
    inputMap.put(block4, Arrays.asList(new String[] { "1", "2", "3" }));
    inputMap.put(block5, Arrays.asList(new String[] { "1", "2", "3" }));
    inputMap.put(block6, Arrays.asList(new String[] { "1", "2", "3" }));
    List<TableBlockInfo> inputBlocks = new ArrayList(6);
    inputBlocks.add(block1);
    inputBlocks.add(block2);
    inputBlocks.add(block3);
    inputBlocks.add(block4);
    inputBlocks.add(block5);
    inputBlocks.add(block6);
    Map<String, List<TableBlockInfo>> outputMap = CarbonLoaderUtil.nodeBlockMapping(inputBlocks, 4);
    Assert.assertTrue(calculateBlockDistribution(inputMap, outputMap, 6, 4));
    Assert.assertTrue(calculateBlockLocality(inputMap, outputMap, 6, 4));
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 12 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonLoaderUtilTest method calculateBlockLocality.

private boolean calculateBlockLocality(Map<TableBlockInfo, List<String>> inputMap, Map<String, List<TableBlockInfo>> outputMap, int numberOfBlocks, int numberOfNodes) {
    double notInNodeLocality = 0;
    for (Map.Entry<String, List<TableBlockInfo>> entry : outputMap.entrySet()) {
        List<TableBlockInfo> blockListOfANode = entry.getValue();
        for (TableBlockInfo eachBlock : blockListOfANode) {
            // for each block check the node locality
            List<String> blockLocality = inputMap.get(eachBlock);
            if (!blockLocality.contains(entry.getKey())) {
                notInNodeLocality++;
            }
        }
    }
    System.out.println(((notInNodeLocality / numberOfBlocks) * 100) + " " + "is the node locality mismatch");
    if ((notInNodeLocality / numberOfBlocks) * 100 > 30) {
        return false;
    }
    return true;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 13 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonCompactionUtil method createDataFileFooterMappingForSegments.

/**
   * To create a mapping of Segment Id and DataFileFooter.
   *
   * @param tableBlockInfoList
   * @return
   */
public static Map<String, List<DataFileFooter>> createDataFileFooterMappingForSegments(List<TableBlockInfo> tableBlockInfoList) throws IOException {
    Map<String, List<DataFileFooter>> segmentBlockInfoMapping = new HashMap<>();
    for (TableBlockInfo blockInfo : tableBlockInfoList) {
        List<DataFileFooter> eachSegmentBlocks = new ArrayList<>();
        String segId = blockInfo.getSegmentId();
        DataFileFooter dataFileMatadata = null;
        // check if segId is already present in map
        List<DataFileFooter> metadataList = segmentBlockInfoMapping.get(segId);
        dataFileMatadata = CarbonUtil.readMetadatFile(blockInfo);
        if (null == metadataList) {
            // if it is not present
            eachSegmentBlocks.add(dataFileMatadata);
            segmentBlockInfoMapping.put(segId, eachSegmentBlocks);
        } else {
            // if its already present then update the list.
            metadataList.add(dataFileMatadata);
        }
    }
    return segmentBlockInfoMapping;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) HashMap(java.util.HashMap) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 14 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class SegmentTaskIndexStoreTest method setUp.

@BeforeClass
public static void setUp() {
    CacheProvider cacheProvider = CacheProvider.getInstance();
    taskIndexStore = (SegmentTaskIndexStore) cacheProvider.<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper>createCache(CacheType.DRIVER_BTREE, "");
    tableBlockInfo = new TableBlockInfo("file", 0L, "SG100", locations, 10L, ColumnarFormatVersion.valueOf(version));
    absoluteTableIdentifier = new AbsoluteTableIdentifier("/tmp", new CarbonTableIdentifier("testdatabase", "testtable", "TB100"));
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) CacheProvider(org.apache.carbondata.core.cache.CacheProvider) BeforeClass(org.junit.BeforeClass)

Example 15 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonInputFormat method getTableBlockInfo.

/**
   * Below method will be used to get the table block info
   *
   * @param job       job context
   * @param segmentId number of segment id
   * @return list of table block
   * @throws IOException
   */
private List<TableBlockInfo> getTableBlockInfo(JobContext job, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier, Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys, UpdateVO updateDetails, SegmentUpdateStatusManager updateStatusManager, String segmentId, Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys) throws IOException {
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<TableBlockInfo>();
    // get file location of all files of given segment
    JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
    newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, tableSegmentUniqueIdentifier.getSegmentId() + "");
    // identify table blocks
    for (InputSplit inputSplit : getSplitsInternal(newJob)) {
        CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
        // then add as TableInfo object.
        if (isValidBlockBasedOnUpdateDetails(taskKeys, carbonInputSplit, updateDetails, updateStatusManager, segmentId, validTaskKeys)) {
            BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
            tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), tableSegmentUniqueIdentifier.getSegmentId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion(), carbonInputSplit.getBlockStorageIdMap()));
        }
    }
    return tableBlockInfoList;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) BlockletInfos(org.apache.carbondata.core.datastore.block.BlockletInfos) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)33 ArrayList (java.util.ArrayList)19 Test (org.junit.Test)11 HashMap (java.util.HashMap)10 List (java.util.List)9 LinkedList (java.util.LinkedList)7 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)7 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)7 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 LinkedHashMap (java.util.LinkedHashMap)5 IOException (java.io.IOException)4 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)4 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)4 File (java.io.File)3 Map (java.util.Map)3 MockUp (mockit.MockUp)3 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 QueryExecutionException (org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)3