Search in sources :

Example 1 with CarbonDataLoadSchema

use of org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema in project carbondata by apache.

the class StoreCreator method createCarbonStore.

/**
 * Create store without any restructure
 */
public static void createCarbonStore() {
    try {
        String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
        File storeDir = new File(storePath);
        CarbonUtil.deleteFoldersAndFiles(storeDir);
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, storePath);
        CarbonTable table = createTable();
        writeDictionary(factFilePath, table);
        CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
        CarbonLoadModel loadModel = new CarbonLoadModel();
        loadModel.setCarbonDataLoadSchema(schema);
        loadModel.setDatabaseName(identifier.getCarbonTableIdentifier().getDatabaseName());
        loadModel.setTableName(identifier.getCarbonTableIdentifier().getTableName());
        loadModel.setTableName(identifier.getCarbonTableIdentifier().getTableName());
        loadModel.setFactFilePath(factFilePath);
        loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
        loadModel.setTablePath(identifier.getTablePath());
        loadModel.setDateFormat(null);
        loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
        loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
        loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
        loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
        loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
        loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
        loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
        loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
        loadModel.setTaskNo("0");
        loadModel.setSegmentId("0");
        loadModel.setFactTimeStamp(System.currentTimeMillis());
        loadModel.setMaxColumns("10");
        loadData(loadModel, storePath);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema) File(java.io.File) IOException(java.io.IOException)

Example 2 with CarbonDataLoadSchema

use of org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema in project carbondata by apache.

the class StoreCreator method buildCarbonLoadModel.

public static CarbonLoadModel buildCarbonLoadModel(CarbonTable table, String factFilePath, AbsoluteTableIdentifier absoluteTableIdentifier) {
    CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
    CarbonLoadModel loadModel = new CarbonLoadModel();
    loadModel.setCarbonDataLoadSchema(schema);
    loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
    loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
    loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
    loadModel.setFactFilePath(factFilePath);
    loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
    loadModel.setTablePath(absoluteTableIdentifier.getTablePath());
    loadModel.setDateFormat(null);
    loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_MILLIS));
    loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
    loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
    loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
    loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
    loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
    loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
    loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
    loadModel.setTaskNo("0");
    loadModel.setSegmentId("0");
    loadModel.setFactTimeStamp(System.currentTimeMillis());
    loadModel.setMaxColumns("10");
    return loadModel;
}
Also used : LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)

Example 3 with CarbonDataLoadSchema

use of org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema in project carbondata by apache.

the class CarbonTableOutputFormat method getLoadModel.

public static CarbonLoadModel getLoadModel(Configuration conf) throws IOException {
    CarbonLoadModel model;
    String encodedString = conf.get(LOAD_MODEL);
    if (encodedString != null) {
        model = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
        return model;
    }
    model = new CarbonLoadModel();
    CarbonProperties carbonProperty = CarbonProperties.getInstance();
    model.setDatabaseName(CarbonTableOutputFormat.getDatabaseName(conf));
    model.setTableName(CarbonTableOutputFormat.getTableName(conf));
    model.setCarbonDataLoadSchema(new CarbonDataLoadSchema(getCarbonTable(conf)));
    model.setTablePath(getTablePath(conf));
    setFileHeader(conf, model);
    model.setSerializationNullFormat(conf.get(SERIALIZATION_NULL_FORMAT, "\\N"));
    model.setBadRecordsLoggerEnable(conf.get(BAD_RECORDS_LOGGER_ENABLE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE, CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT)));
    model.setBadRecordsAction(conf.get(BAD_RECORDS_LOGGER_ACTION, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)));
    model.setIsEmptyDataBadRecord(conf.get(IS_EMPTY_DATA_BAD_RECORD, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD, CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT)));
    model.setSkipEmptyLine(conf.get(SKIP_EMPTY_LINE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SKIP_EMPTY_LINE)));
    String complexDelim = conf.get(COMPLEX_DELIMITERS, "$" + "," + ":");
    String[] split = complexDelim.split(",");
    model.setComplexDelimiterLevel1(split[0]);
    if (split.length > 1) {
        model.setComplexDelimiterLevel2(split[1]);
    }
    model.setDateFormat(conf.get(DATE_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT)));
    model.setTimestampformat(conf.get(TIMESTAMP_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT_DEFAULT)));
    model.setGlobalSortPartitions(conf.get(GLOBAL_SORT_PARTITIONS, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, null)));
    model.setBatchSortSizeInMb(conf.get(BATCH_SORT_SIZE_INMB, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BATCH_SORT_SIZE_INMB, carbonProperty.getProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB_DEFAULT))));
    model.setBadRecordsLocation(conf.get(BAD_RECORD_PATH, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL))));
    model.setUseOnePass(conf.getBoolean(IS_ONE_PASS_LOAD, Boolean.parseBoolean(carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS, CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS_DEFAULT))));
    return model;
}
Also used : CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)

Aggregations

CarbonDataLoadSchema (org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)3 CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)3 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 File (java.io.File)1 IOException (java.io.IOException)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 CarbonProperties (org.apache.carbondata.core.util.CarbonProperties)1