Search in sources :

Example 1 with CarbonTableInputFormat

use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.

the class CarbonInputFormatUtil method createCarbonTableInputFormat.

public static <V> CarbonTableInputFormat<V> createCarbonTableInputFormat(AbsoluteTableIdentifier identifier, List<String> partitionId, Job job) throws IOException {
    CarbonTableInputFormat<V> carbonTableInputFormat = new CarbonTableInputFormat<>();
    CarbonTableInputFormat.setPartitionIdList(job.getConfiguration(), partitionId);
    CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getDatabaseName());
    CarbonTableInputFormat.setTableName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getTableName());
    FileInputFormat.addInputPath(job, new Path(identifier.getTablePath()));
    return carbonTableInputFormat;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat)

Example 2 with CarbonTableInputFormat

use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.

the class CarbonInputFormatUtil method createCarbonInputFormat.

public static <V> CarbonTableInputFormat<V> createCarbonInputFormat(AbsoluteTableIdentifier identifier, Job job) throws IOException {
    CarbonTableInputFormat<V> carbonInputFormat = new CarbonTableInputFormat<>();
    CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getDatabaseName());
    CarbonTableInputFormat.setTableName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getTableName());
    FileInputFormat.addInputPath(job, new Path(identifier.getTablePath()));
    return carbonInputFormat;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat)

Example 3 with CarbonTableInputFormat

use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.

the class CarbondataRecordSetProvider method getRecordSet.

@Override
public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) {
    CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit");
    checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector");
    CarbonProjection carbonProjection = new CarbonProjection();
    // Convert all columns handles
    ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder();
    for (ColumnHandle handle : columns) {
        handles.add(checkType(handle, CarbondataColumnHandle.class, "handle"));
        carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName());
    }
    CarbonTableCacheModel tableCacheModel = carbonTableReader.getCarbonCache(carbondataSplit.getSchemaTableName());
    checkNotNull(tableCacheModel, "tableCacheModel should not be null");
    checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null");
    checkNotNull(tableCacheModel.carbonTable.getTableInfo(), "tableCacheModel.tableInfo should not be null");
    // Build Query Model
    CarbonTable targetTable = tableCacheModel.carbonTable;
    QueryModel queryModel;
    TaskAttemptContextImpl hadoopAttemptContext;
    try {
        Configuration conf = new Configuration();
        conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
        String carbonTablePath = targetTable.getAbsoluteTableIdentifier().getTablePath();
        conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
        JobConf jobConf = new JobConf(conf);
        CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection);
        hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
        CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit());
        queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext);
        queryModel.setVectorReader(true);
    } catch (IOException e) {
        throw new RuntimeException("Unable to get the Query Model ", e);
    }
    return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel, hadoopAttemptContext);
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList(com.google.common.collect.ImmutableList) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IOException(java.io.IOException) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) JobConf(org.apache.hadoop.mapred.JobConf)

Example 4 with CarbonTableInputFormat

use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.

the class CarbonStreamRecordReader method initialize.

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // input
    if (split instanceof CarbonInputSplit) {
        fileSplit = (CarbonInputSplit) split;
    } else if (split instanceof CarbonMultiBlockSplit) {
        fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
    } else {
        fileSplit = (FileSplit) split;
    }
    // metadata
    hadoopConf = context.getConfiguration();
    if (model == null) {
        CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
        model = format.createQueryModel(split, context);
    }
    carbonTable = model.getTable();
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
    dimensionCount = dimensions.size();
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
    measureCount = measures.size();
    List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
    storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
    isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
    directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
        }
    }
    measureDataTypes = new DataType[measureCount];
    for (int i = 0; i < measureCount; i++) {
        measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
    }
    // decode data
    allNonNull = new BitSet(storageColumns.length);
    projection = model.getProjectionColumns();
    isRequired = new boolean[storageColumns.length];
    boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
    boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
    isFilterRequired = new boolean[storageColumns.length];
    filterMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].isDimension()) {
            if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = storageColumns[i].getOrdinal();
            }
        } else {
            if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
            }
        }
    }
    isProjectionRequired = new boolean[storageColumns.length];
    projectionMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        for (int j = 0; j < projection.length; j++) {
            if (storageColumns[i].getColName().equals(projection[j].getColName())) {
                isRequired[i] = true;
                isProjectionRequired[i] = true;
                projectionMap[i] = j;
                break;
            }
        }
    }
    // initialize filter
    if (null != model.getFilterExpressionResolverTree()) {
        initializeFilter();
    } else if (projection.length == 0) {
        skipScanData = true;
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) BitSet(java.util.BitSet) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonMultiBlockSplit(org.apache.carbondata.hadoop.CarbonMultiBlockSplit) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat)

Example 5 with CarbonTableInputFormat

use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.

the class CarbonTableInputFormatTest method testGetSplits.

@Test
public void testGetSplits() throws Exception {
    CarbonTableInputFormat carbonInputFormat = new CarbonTableInputFormat();
    JobConf jobConf = new JobConf(new Configuration());
    Job job = Job.getInstance(jobConf);
    job.getConfiguration().set("query.id", UUID.randomUUID().toString());
    String tblPath = StoreCreator.getAbsoluteTableIdentifier().getTablePath();
    FileInputFormat.addInputPath(job, new Path(tblPath));
    CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), StoreCreator.getAbsoluteTableIdentifier().getDatabaseName());
    CarbonTableInputFormat.setTableName(job.getConfiguration(), StoreCreator.getAbsoluteTableIdentifier().getTableName());
    // list files to get the carbondata file
    String segmentPath = CarbonTablePath.getSegmentPath(StoreCreator.getAbsoluteTableIdentifier().getTablePath(), "0");
    File segmentDir = new File(segmentPath);
    if (segmentDir.exists() && segmentDir.isDirectory()) {
        File[] files = segmentDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.getName().endsWith("carbondata");
            }
        });
        if (files != null && files.length > 0) {
            job.getConfiguration().set(CarbonTableInputFormat.INPUT_FILES, files[0].getName());
        }
    }
    List splits = carbonInputFormat.getSplits(job);
    Assert.assertTrue(splits != null && splits.size() == 1);
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Configuration(org.apache.hadoop.conf.Configuration) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) FileFilter(java.io.FileFilter) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Aggregations

CarbonTableInputFormat (org.apache.carbondata.hadoop.api.CarbonTableInputFormat)9 Configuration (org.apache.hadoop.conf.Configuration)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)3 Job (org.apache.hadoop.mapreduce.Job)3 IOException (java.io.IOException)2 List (java.util.List)2 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)2 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)2 Test (org.junit.Test)2 Gson (com.facebook.presto.hadoop.$internal.com.google.gson.Gson)1 ColumnHandle (com.facebook.presto.spi.ColumnHandle)1 ImmutableList (com.google.common.collect.ImmutableList)1 File (java.io.File)1 FileFilter (java.io.FileFilter)1 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)1