Search in sources :

Example 1 with DataLoadExecutor

use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.

the class StoreCreator method loadData.

/**
 * Execute graph which will further load data
 *
 * @param loadModel
 * @param storeLocation
 * @throws Exception
 */
public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setMaxColumns(configuration, "10");
    CSVInputFormat.setNumberOfColumns(configuration, "7");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    String[] storeLocationArray = new String[] { storeLocation + "/" + databaseName + "/" + tableName };
    new DataLoadExecutor().execute(loadModel, storeLocationArray, new CarbonIterator[] { readerIterator });
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.loading.csvinput.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.loading.csvinput.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.loading.csvinput.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.loading.csvinput.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.loading.DataLoadExecutor)

Example 2 with DataLoadExecutor

use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.

the class CarbonTableOutputFormat method getRecordWriter.

@Override
public RecordWriter<NullWritable, ObjectArrayWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException {
    final CarbonLoadModel loadModel = getLoadModel(taskAttemptContext.getConfiguration());
    loadModel.setTaskNo(taskAttemptContext.getConfiguration().get("carbon.outputformat.taskno", String.valueOf(System.nanoTime())));
    loadModel.setDataWritePath(taskAttemptContext.getConfiguration().get("carbon.outputformat.writepath"));
    final String[] tempStoreLocations = getTempStoreLocations(taskAttemptContext);
    final CarbonOutputIteratorWrapper iteratorWrapper = new CarbonOutputIteratorWrapper();
    final DataLoadExecutor dataLoadExecutor = new DataLoadExecutor();
    ExecutorService executorService = Executors.newFixedThreadPool(1, new CarbonThreadFactory("CarbonRecordWriter:" + loadModel.getTableName()));
    ;
    // It should be started in new thread as the underlying iterator uses blocking queue.
    Future future = executorService.submit(new Thread() {

        @Override
        public void run() {
            try {
                dataLoadExecutor.execute(loadModel, tempStoreLocations, new CarbonIterator[] { iteratorWrapper });
            } catch (Exception e) {
                dataLoadExecutor.close();
                // clean up the folders and files created locally for data load operation
                TableProcessingOperations.deleteLocalDataLoadFolderLocation(loadModel, false, false);
                throw new RuntimeException(e);
            }
        }
    });
    return new CarbonRecordWriter(iteratorWrapper, dataLoadExecutor, loadModel, future, executorService);
}
Also used : IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonOutputIteratorWrapper(org.apache.carbondata.processing.loading.iterator.CarbonOutputIteratorWrapper) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) ExecutorService(java.util.concurrent.ExecutorService) CarbonThreadFactory(org.apache.carbondata.core.util.CarbonThreadFactory) Future(java.util.concurrent.Future) DataLoadExecutor(org.apache.carbondata.processing.loading.DataLoadExecutor)

Example 3 with DataLoadExecutor

use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.

the class StoreCreator method loadData.

/**
 * Execute graph which will further load data
 *
 * @param loadModel
 * @param storeLocation
 * @throws Exception
 */
public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    if (new File(storeLocation).mkdirs()) {
        LOG.warn("mkdir is failed");
    }
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation + "/" + databaseName + "/" + tableName);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        if (!path.delete()) {
            LOG.warn("delete " + path + " failed");
        }
    }
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
    CSVInputFormat.setMaxColumns(configuration, "10");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, new String[] { storeLocation + "/" + databaseName + "/" + tableName }, new CarbonIterator[] { readerIterator });
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.loading.csvinput.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.loading.csvinput.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.loading.csvinput.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.loading.csvinput.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.loading.DataLoadExecutor)

Aggregations

DataLoadExecutor (org.apache.carbondata.processing.loading.DataLoadExecutor)3 File (java.io.File)2 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)2 BlockDetails (org.apache.carbondata.processing.loading.csvinput.BlockDetails)2 CSVInputFormat (org.apache.carbondata.processing.loading.csvinput.CSVInputFormat)2 CSVRecordReaderIterator (org.apache.carbondata.processing.loading.csvinput.CSVRecordReaderIterator)2 StringArrayWritable (org.apache.carbondata.processing.loading.csvinput.StringArrayWritable)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 NullWritable (org.apache.hadoop.io.NullWritable)2 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)2 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)2 IOException (java.io.IOException)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 CarbonIterator (org.apache.carbondata.common.CarbonIterator)1 CarbonThreadFactory (org.apache.carbondata.core.util.CarbonThreadFactory)1 CarbonOutputIteratorWrapper (org.apache.carbondata.processing.loading.iterator.CarbonOutputIteratorWrapper)1