use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.
the class StoreCreator method loadData.
/**
* Execute graph which will further load data
*
* @param loadModel
* @param storeLocation
* @throws Exception
*/
public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception {
new File(storeLocation).mkdirs();
String outPutLoc = storeLocation + "/etl";
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = databaseName + '_' + tableName + "_1";
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
CarbonProperties.getInstance().addProperty("send.signal.load", "false");
CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
File path = new File(graphPath);
if (path.exists()) {
path.delete();
}
BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
Configuration configuration = new Configuration();
CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
CSVInputFormat.setMaxColumns(configuration, "10");
CSVInputFormat.setNumberOfColumns(configuration, "7");
TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CSVInputFormat format = new CSVInputFormat();
RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
String[] storeLocationArray = new String[] { storeLocation + "/" + databaseName + "/" + tableName };
new DataLoadExecutor().execute(loadModel, storeLocationArray, new CarbonIterator[] { readerIterator });
writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
}
use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.
the class CarbonTableOutputFormat method getRecordWriter.
@Override
public RecordWriter<NullWritable, ObjectArrayWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException {
final CarbonLoadModel loadModel = getLoadModel(taskAttemptContext.getConfiguration());
loadModel.setTaskNo(taskAttemptContext.getConfiguration().get("carbon.outputformat.taskno", String.valueOf(System.nanoTime())));
loadModel.setDataWritePath(taskAttemptContext.getConfiguration().get("carbon.outputformat.writepath"));
final String[] tempStoreLocations = getTempStoreLocations(taskAttemptContext);
final CarbonOutputIteratorWrapper iteratorWrapper = new CarbonOutputIteratorWrapper();
final DataLoadExecutor dataLoadExecutor = new DataLoadExecutor();
ExecutorService executorService = Executors.newFixedThreadPool(1, new CarbonThreadFactory("CarbonRecordWriter:" + loadModel.getTableName()));
;
// It should be started in new thread as the underlying iterator uses blocking queue.
Future future = executorService.submit(new Thread() {
@Override
public void run() {
try {
dataLoadExecutor.execute(loadModel, tempStoreLocations, new CarbonIterator[] { iteratorWrapper });
} catch (Exception e) {
dataLoadExecutor.close();
// clean up the folders and files created locally for data load operation
TableProcessingOperations.deleteLocalDataLoadFolderLocation(loadModel, false, false);
throw new RuntimeException(e);
}
}
});
return new CarbonRecordWriter(iteratorWrapper, dataLoadExecutor, loadModel, future, executorService);
}
use of org.apache.carbondata.processing.loading.DataLoadExecutor in project carbondata by apache.
the class StoreCreator method loadData.
/**
* Execute graph which will further load data
*
* @param loadModel
* @param storeLocation
* @throws Exception
*/
public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception {
if (new File(storeLocation).mkdirs()) {
LOG.warn("mkdir is failed");
}
String outPutLoc = storeLocation + "/etl";
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = databaseName + '_' + tableName + "_1";
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation + "/" + databaseName + "/" + tableName);
CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
CarbonProperties.getInstance().addProperty("send.signal.load", "false");
CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
File path = new File(graphPath);
if (path.exists()) {
if (!path.delete()) {
LOG.warn("delete " + path + " failed");
}
}
BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
Configuration configuration = new Configuration();
CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
CSVInputFormat.setMaxColumns(configuration, "10");
TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CSVInputFormat format = new CSVInputFormat();
RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
new DataLoadExecutor().execute(loadModel, new String[] { storeLocation + "/" + databaseName + "/" + tableName }, new CarbonIterator[] { readerIterator });
writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
}
Aggregations