Search in sources :

Example 6 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class StoreCreator method createCarbonStore.

/**
 * Create store without any restructure
 */
public CarbonLoadModel createCarbonStore() throws Exception {
    CarbonLoadModel loadModel = createTableAndLoadModel();
    loadData(loadModel, storePath);
    return loadModel;
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Example 7 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class MapredCarbonOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(jc);
    CarbonLoadModel carbonLoadModel = null;
    // Try to get loadmodel from JobConf.
    String encodedString = jc.get(LOAD_MODEL);
    if (encodedString != null) {
        carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
    } else {
        // Try to get loadmodel from Container environment.
        encodedString = System.getenv("carbon");
        if (encodedString != null) {
            carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
        } else {
            carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(tableProperties, jc);
        }
    }
    for (Map.Entry<Object, Object> entry : tableProperties.entrySet()) {
        carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getTableInfo().getFactTable().getTableProperties().put(entry.getKey().toString().toLowerCase(), entry.getValue().toString().toLowerCase());
    }
    String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath();
    TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id"));
    // null, so prepare a new ID.
    if (taskAttemptID == null) {
        SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm");
        String jobTrackerId = formatter.format(new Date());
        taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0);
        // update the app name here, as in this class by default it will written by Hive
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, "presto");
    } else {
        carbonLoadModel.setTaskNo("" + taskAttemptID.getTaskID().getId());
    }
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(jc, taskAttemptID);
    final boolean isHivePartitionedTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable();
    PartitionInfo partitionInfo = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getPartitionInfo();
    final int partitionColumn = partitionInfo != null ? partitionInfo.getColumnSchemaList().size() : 0;
    if (carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
        carbonLoadModel.getMetrics().addToPartitionPath(finalOutPath.toString());
        context.getConfiguration().set("carbon.outputformat.writepath", finalOutPath.toString());
    }
    CarbonTableOutputFormat.setLoadModel(jc, carbonLoadModel);
    org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ObjectArrayWritable> re = super.getRecordWriter(context);
    return new FileSinkOperator.RecordWriter() {

        @Override
        public void write(Writable writable) throws IOException {
            try {
                ObjectArrayWritable objectArrayWritable = new ObjectArrayWritable();
                if (isHivePartitionedTable) {
                    Object[] actualRow = ((CarbonHiveRow) writable).getData();
                    Object[] newData = Arrays.copyOf(actualRow, actualRow.length + partitionColumn);
                    String[] partitionValues = finalOutPath.toString().substring(tablePath.length()).split("/");
                    for (int j = 0, i = actualRow.length; j < partitionValues.length; j++) {
                        if (partitionValues[j].contains("=")) {
                            newData[i++] = partitionValues[j].split("=")[1];
                        }
                    }
                    objectArrayWritable.set(newData);
                } else {
                    objectArrayWritable.set(((CarbonHiveRow) writable).getData());
                }
                re.write(NullWritable.get(), objectArrayWritable);
            } catch (InterruptedException e) {
                throw new IOException(e.getCause());
            }
        }

        @Override
        public void close(boolean b) throws IOException {
            try {
                re.close(context);
                ThreadLocalSessionInfo.setConfigurationToCurrentThread(context.getConfiguration());
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }
    };
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) IOException(java.io.IOException) ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) Date(java.util.Date) RecordWriter(org.apache.hadoop.mapred.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) Map(java.util.Map) SimpleDateFormat(java.text.SimpleDateFormat)

Example 8 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class HiveCarbonUtil method getCarbonLoadModel.

public static CarbonLoadModel getCarbonLoadModel(String tableName, String databaseName, String location, String sortColumnsString, String[] columns, String[] columnTypes, Configuration configuration) {
    CarbonLoadModel loadModel;
    CarbonTable carbonTable;
    try {
        String schemaFilePath = CarbonTablePath.getSchemaFilePath(location, configuration);
        AbsoluteTableIdentifier absoluteTableIdentifier = AbsoluteTableIdentifier.from(location, databaseName, tableName, "");
        if (FileFactory.getCarbonFile(schemaFilePath).exists()) {
            carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
            carbonTable.setTransactionalTable(true);
        } else {
            String carbonDataFile = CarbonUtil.getFilePathExternalFilePath(location, configuration);
            if (carbonDataFile == null) {
                carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(tableName, databaseName, location, sortColumnsString, columns, columnTypes, new ArrayList<>()));
            } else {
                carbonTable = CarbonTable.buildFromTableInfo(SchemaReader.inferSchema(absoluteTableIdentifier, false, configuration));
            }
            carbonTable.setTransactionalTable(false);
        }
    } catch (SQLException | IOException e) {
        throw new RuntimeException("Unable to fetch schema for the table: " + tableName, e);
    }
    CarbonLoadModelBuilder carbonLoadModelBuilder = new CarbonLoadModelBuilder(carbonTable);
    Map<String, String> options = new HashMap<>();
    options.put("fileheader", Strings.mkString(columns, ","));
    try {
        loadModel = carbonLoadModelBuilder.build(options, System.currentTimeMillis(), "");
    } catch (InvalidLoadOptionException | IOException e) {
        throw new RuntimeException(e);
    }
    loadModel.setSkipParsers();
    loadModel.setMetrics(new DataLoadMetrics());
    return loadModel;
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) DataLoadMetrics(org.apache.carbondata.core.util.DataLoadMetrics) SQLException(java.sql.SQLException) HashMap(java.util.HashMap) IOException(java.io.IOException) CarbonLoadModelBuilder(org.apache.carbondata.processing.loading.model.CarbonLoadModelBuilder) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Example 9 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class HiveCarbonUtil method getCarbonLoadModel.

public static CarbonLoadModel getCarbonLoadModel(Configuration tableProperties) {
    String[] tableUniqueName = tableProperties.get("name").split("\\.");
    String databaseName = tableUniqueName[0];
    String tableName = tableUniqueName[1];
    String tablePath = tableProperties.get(hive_metastoreConstants.META_TABLE_LOCATION);
    String columns = tableProperties.get(hive_metastoreConstants.META_TABLE_COLUMNS);
    String sortColumns = tableProperties.get("sort_columns");
    String columnTypes = tableProperties.get(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
    String partitionColumns = tableProperties.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    String partitionColumnTypes = tableProperties.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
    if (partitionColumns != null) {
        columns = columns + "," + partitionColumns;
        columnTypes = columnTypes + ":" + partitionColumnTypes;
    }
    String[] columnTypeArray = splitSchemaStringToArray(columnTypes);
    String complexDelim = tableProperties.get("complex_delimiter", "");
    CarbonLoadModel carbonLoadModel = getCarbonLoadModel(tableName, databaseName, tablePath, sortColumns, columns.split(","), columnTypeArray, tableProperties);
    carbonLoadModel.setCarbonTransactionalTable(true);
    carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().setTransactionalTable(true);
    for (String delim : complexDelim.split(",")) {
        carbonLoadModel.setComplexDelimiter(delim);
    }
    return carbonLoadModel;
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Example 10 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class HiveCarbonUtil method getCarbonLoadModel.

public static CarbonLoadModel getCarbonLoadModel(Properties tableProperties, Configuration configuration) {
    String[] tableUniqueName = tableProperties.getProperty("name").split("\\.");
    String databaseName = tableUniqueName[0];
    String tableName = tableUniqueName[1];
    String tablePath = tableProperties.getProperty(hive_metastoreConstants.META_TABLE_LOCATION);
    String columns = tableProperties.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
    String sortColumns = tableProperties.getProperty("sort_columns");
    String[] columnTypes = splitSchemaStringToArray(tableProperties.getProperty("columns.types"));
    String complexDelim = tableProperties.getProperty("complex_delimiter", "");
    CarbonLoadModel carbonLoadModel = getCarbonLoadModel(tableName, databaseName, tablePath, sortColumns, columns.split(","), columnTypes, configuration);
    for (String delim : complexDelim.split(",")) {
        carbonLoadModel.setComplexDelimiter(delim);
    }
    return carbonLoadModel;
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Aggregations

CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)21 IOException (java.io.IOException)9 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 DataLoadMetrics (org.apache.carbondata.core.util.DataLoadMetrics)3 CarbonDataLoadSchema (org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)2 CarbonProperties (org.apache.carbondata.core.util.CarbonProperties)2 OperationContext (org.apache.carbondata.events.OperationContext)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 HiveInsertTableHandle (io.prestosql.plugin.hive.HiveInsertTableHandle)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 Table (io.prestosql.plugin.hive.metastore.Table)1 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)1