Search in sources :

Example 26 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class StoreCreator method createCarbonStore.

/**
   * Create store without any restructure
   */
public static void createCarbonStore() {
    try {
        String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
        File storeDir = new File(absoluteTableIdentifier.getStorePath());
        CarbonUtil.deleteFoldersAndFiles(storeDir);
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, absoluteTableIdentifier.getStorePath());
        CarbonTable table = createTable();
        writeDictionary(factFilePath, table);
        CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
        CarbonLoadModel loadModel = new CarbonLoadModel();
        String partitionId = "0";
        loadModel.setCarbonDataLoadSchema(schema);
        loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setFactFilePath(factFilePath);
        loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
        loadModel.setStorePath(absoluteTableIdentifier.getStorePath());
        loadModel.setDateFormat(null);
        loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
        loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
        loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
        loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
        loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
        loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
        loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
        loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
        loadModel.setTaskNo("0");
        loadModel.setSegmentId("0");
        loadModel.setPartitionId("0");
        loadModel.setFactTimeStamp(System.currentTimeMillis());
        loadModel.setMaxColumns("10");
        executeGraph(loadModel, absoluteTableIdentifier.getStorePath());
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.model.CarbonDataLoadSchema) File(java.io.File) IOException(java.io.IOException)

Example 27 with LoadMetadataDetails

use of org.apache.carbondata.core.statusmanager.LoadMetadataDetails in project carbondata by apache.

the class StoreCreator method executeGraph.

/**
   * Execute graph which will further load data
   *
   * @param loadModel
   * @param storeLocation
   * @throws Exception
   */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");
    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }
    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
    CSVInputFormat.setMaxColumns(configuration, "10");
    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();
    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext);
    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });
    info.setDatabaseName(databaseName);
    info.setTableName(tableName);
    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>());
    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        //      Files.copy(factFile.toPath(), file.toPath(), REPLACE_EXISTING);
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CSVRecordReaderIterator(org.apache.carbondata.processing.csvload.CSVRecordReaderIterator) StringArrayWritable(org.apache.carbondata.processing.csvload.StringArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) CSVInputFormat(org.apache.carbondata.processing.csvload.CSVInputFormat) BlockDetails(org.apache.carbondata.processing.csvload.BlockDetails) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) DataLoadExecutor(org.apache.carbondata.processing.newflow.DataLoadExecutor) SchemaInfo(org.apache.carbondata.processing.api.dataloader.SchemaInfo)

Aggregations

LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)27 ArrayList (java.util.ArrayList)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)10 IOException (java.io.IOException)9 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)7 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)7 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)6 File (java.io.File)4 Test (org.junit.Test)4 ParseException (java.text.ParseException)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 Gson (com.google.gson.Gson)2 BufferedWriter (java.io.BufferedWriter)2 DataOutputStream (java.io.DataOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)2 AtomicFileOperations (org.apache.carbondata.core.fileoperations.AtomicFileOperations)2 AtomicFileOperationsImpl (org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl)2 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)2