Search in sources :

Example 11 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class MetaStoreDirectSql method getPartitionsFromPartitionIds.

/** Should be called with the list short enough to not trip up Oracle/etc. */
private List<Partition> getPartitionsFromPartitionIds(String dbName, String tblName, Boolean isView, List<Object> partIdList) throws MetaException {
    boolean doTrace = LOG.isDebugEnabled();
    // 1 for comma
    int idStringWidth = (int) Math.ceil(Math.log10(partIdList.size())) + 1;
    int sbCapacity = partIdList.size() * idStringWidth;
    // Prepare StringBuilder for "PART_ID in (...)" to use in future queries.
    StringBuilder partSb = new StringBuilder(sbCapacity);
    for (Object partitionId : partIdList) {
        partSb.append(extractSqlLong(partitionId)).append(",");
    }
    String partIds = trimCommaList(partSb);
    // Get most of the fields for the IDs provided.
    // Assume db and table names are the same for all partition, as provided in arguments.
    String queryText = "select \"PARTITIONS\".\"PART_ID\", \"SDS\".\"SD_ID\", \"SDS\".\"CD_ID\"," + " \"SERDES\".\"SERDE_ID\", \"PARTITIONS\".\"CREATE_TIME\"," + " \"PARTITIONS\".\"LAST_ACCESS_TIME\", \"SDS\".\"INPUT_FORMAT\", \"SDS\".\"IS_COMPRESSED\"," + " \"SDS\".\"IS_STOREDASSUBDIRECTORIES\", \"SDS\".\"LOCATION\", \"SDS\".\"NUM_BUCKETS\"," + " \"SDS\".\"OUTPUT_FORMAT\", \"SERDES\".\"NAME\", \"SERDES\".\"SLIB\" " + "from \"PARTITIONS\"" + "  left outer join \"SDS\" on \"PARTITIONS\".\"SD_ID\" = \"SDS\".\"SD_ID\" " + "  left outer join \"SERDES\" on \"SDS\".\"SERDE_ID\" = \"SERDES\".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc";
    long start = doTrace ? System.nanoTime() : 0;
    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
    List<Object[]> sqlResult = executeWithArray(query, null, queryText);
    long queryTime = doTrace ? System.nanoTime() : 0;
    Deadline.checkTimeout();
    // Read all the fields and create partitions, SDs and serdes.
    TreeMap<Long, Partition> partitions = new TreeMap<Long, Partition>();
    TreeMap<Long, StorageDescriptor> sds = new TreeMap<Long, StorageDescriptor>();
    TreeMap<Long, SerDeInfo> serdes = new TreeMap<Long, SerDeInfo>();
    TreeMap<Long, List<FieldSchema>> colss = new TreeMap<Long, List<FieldSchema>>();
    // Keep order by name, consistent with JDO.
    ArrayList<Partition> orderedResult = new ArrayList<Partition>(partIdList.size());
    // Prepare StringBuilder-s for "in (...)" lists to use in one-to-many queries.
    StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new StringBuilder(sbCapacity);
    // We expect that there's only one field schema.
    StringBuilder colsSb = new StringBuilder(7);
    tblName = tblName.toLowerCase();
    dbName = dbName.toLowerCase();
    for (Object[] fields : sqlResult) {
        // Here comes the ugly part...
        long partitionId = extractSqlLong(fields[0]);
        Long sdId = extractSqlLong(fields[1]);
        Long colId = extractSqlLong(fields[2]);
        Long serdeId = extractSqlLong(fields[3]);
        // A partition must have at least sdId and serdeId set, or nothing set if it's a view.
        if (sdId == null || serdeId == null) {
            if (isView == null) {
                isView = isViewTable(dbName, tblName);
            }
            if ((sdId != null || colId != null || serdeId != null) || !isView) {
                throw new MetaException("Unexpected null for one of the IDs, SD " + sdId + ", serde " + serdeId + " for a " + (isView ? "" : "non-") + " view");
            }
        }
        Partition part = new Partition();
        orderedResult.add(part);
        // Set the collection fields; some code might not check presence before accessing them.
        part.setParameters(new HashMap<String, String>());
        part.setValues(new ArrayList<String>());
        part.setDbName(dbName);
        part.setTableName(tblName);
        if (fields[4] != null)
            part.setCreateTime(extractSqlInt(fields[4]));
        if (fields[5] != null)
            part.setLastAccessTime(extractSqlInt(fields[5]));
        partitions.put(partitionId, part);
        // Probably a view.
        if (sdId == null)
            continue;
        assert serdeId != null;
        // We assume each partition has an unique SD.
        StorageDescriptor sd = new StorageDescriptor();
        StorageDescriptor oldSd = sds.put(sdId, sd);
        if (oldSd != null) {
            throw new MetaException("Partitions reuse SDs; we don't expect that");
        }
        // Set the collection fields; some code might not check presence before accessing them.
        sd.setSortCols(new ArrayList<Order>());
        sd.setBucketCols(new ArrayList<String>());
        sd.setParameters(new HashMap<String, String>());
        sd.setSkewedInfo(new SkewedInfo(new ArrayList<String>(), new ArrayList<List<String>>(), new HashMap<List<String>, String>()));
        sd.setInputFormat((String) fields[6]);
        Boolean tmpBoolean = extractSqlBoolean(fields[7]);
        if (tmpBoolean != null)
            sd.setCompressed(tmpBoolean);
        tmpBoolean = extractSqlBoolean(fields[8]);
        if (tmpBoolean != null)
            sd.setStoredAsSubDirectories(tmpBoolean);
        sd.setLocation((String) fields[9]);
        if (fields[10] != null)
            sd.setNumBuckets(extractSqlInt(fields[10]));
        sd.setOutputFormat((String) fields[11]);
        sdSb.append(sdId).append(",");
        part.setSd(sd);
        if (colId != null) {
            List<FieldSchema> cols = colss.get(colId);
            // We expect that colId will be the same for all (or many) SDs.
            if (cols == null) {
                cols = new ArrayList<FieldSchema>();
                colss.put(colId, cols);
                colsSb.append(colId).append(",");
            }
            sd.setCols(cols);
        }
        // We assume each SD has an unique serde.
        SerDeInfo serde = new SerDeInfo();
        SerDeInfo oldSerde = serdes.put(serdeId, serde);
        if (oldSerde != null) {
            throw new MetaException("SDs reuse serdes; we don't expect that");
        }
        serde.setParameters(new HashMap<String, String>());
        serde.setName((String) fields[12]);
        serde.setSerializationLib((String) fields[13]);
        serdeSb.append(serdeId).append(",");
        sd.setSerdeInfo(serde);
        Deadline.checkTimeout();
    }
    query.closeAll();
    timingTrace(doTrace, queryText, start, queryTime);
    // Now get all the one-to-many things. Start with partitions.
    queryText = "select \"PART_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"PARTITION_PARAMS\"" + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null" + " order by \"PART_ID\" asc";
    loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {

        @Override
        public void apply(Partition t, Object[] fields) {
            t.putToParameters((String) fields[1], (String) fields[2]);
        }
    });
    // Perform conversion of null map values
    for (Partition t : partitions.values()) {
        t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
    }
    queryText = "select \"PART_ID\", \"PART_KEY_VAL\" from \"PARTITION_KEY_VALS\"" + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
    loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {

        @Override
        public void apply(Partition t, Object[] fields) {
            t.addToValues((String) fields[1]);
        }
    });
    // Prepare IN (blah) lists for the following queries. Cut off the final ','s.
    if (sdSb.length() == 0) {
        assert serdeSb.length() == 0 && colsSb.length() == 0;
        // No SDs, probably a view.
        return orderedResult;
    }
    String sdIds = trimCommaList(sdSb);
    String serdeIds = trimCommaList(serdeSb);
    String colIds = trimCommaList(colsSb);
    // Get all the stuff for SD. Don't do empty-list check - we expect partitions do have SDs.
    queryText = "select \"SD_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"SD_PARAMS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null" + " order by \"SD_ID\" asc";
    loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

        @Override
        public void apply(StorageDescriptor t, Object[] fields) {
            t.putToParameters((String) fields[1], (String) fields[2]);
        }
    });
    // Perform conversion of null map values
    for (StorageDescriptor t : sds.values()) {
        t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
    }
    queryText = "select \"SD_ID\", \"COLUMN_NAME\", \"SORT_COLS\".\"ORDER\"" + " from \"SORT_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
    loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

        @Override
        public void apply(StorageDescriptor t, Object[] fields) {
            if (fields[2] == null)
                return;
            t.addToSortCols(new Order((String) fields[1], extractSqlInt(fields[2])));
        }
    });
    queryText = "select \"SD_ID\", \"BUCKET_COL_NAME\" from \"BUCKETING_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
    loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

        @Override
        public void apply(StorageDescriptor t, Object[] fields) {
            t.addToBucketCols((String) fields[1]);
        }
    });
    // Skewed columns stuff.
    queryText = "select \"SD_ID\", \"SKEWED_COL_NAME\" from \"SKEWED_COL_NAMES\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
    boolean hasSkewedColumns = loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

        @Override
        public void apply(StorageDescriptor t, Object[] fields) {
            if (!t.isSetSkewedInfo())
                t.setSkewedInfo(new SkewedInfo());
            t.getSkewedInfo().addToSkewedColNames((String) fields[1]);
        }
    }) > 0;
    // Assume we don't need to fetch the rest of the skewed column data if we have no columns.
    if (hasSkewedColumns) {
        // We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless.
        queryText = "select \"SKEWED_VALUES\".\"SD_ID_OID\"," + "  \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\"," + "  \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_VALUE\" " + "from \"SKEWED_VALUES\" " + "  left outer join \"SKEWED_STRING_LIST_VALUES\" on \"SKEWED_VALUES\"." + "\"STRING_LIST_ID_EID\" = \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" " + "where \"SKEWED_VALUES\".\"SD_ID_OID\" in (" + sdIds + ") " + "  and \"SKEWED_VALUES\".\"STRING_LIST_ID_EID\" is not null " + "  and \"SKEWED_VALUES\".\"INTEGER_IDX\" >= 0 " + "order by \"SKEWED_VALUES\".\"SD_ID_OID\" asc, \"SKEWED_VALUES\".\"INTEGER_IDX\" asc," + "  \"SKEWED_STRING_LIST_VALUES\".\"INTEGER_IDX\" asc";
        loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

            private Long currentListId;

            private List<String> currentList;

            @Override
            public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
                if (!t.isSetSkewedInfo())
                    t.setSkewedInfo(new SkewedInfo());
                // the last list. Instead we add list to SD first, as well as locally to add elements.
                if (fields[1] == null) {
                    // left outer join produced a list with no values
                    currentList = null;
                    currentListId = null;
                    t.getSkewedInfo().addToSkewedColValues(new ArrayList<String>());
                } else {
                    long fieldsListId = extractSqlLong(fields[1]);
                    if (currentListId == null || fieldsListId != currentListId) {
                        currentList = new ArrayList<String>();
                        currentListId = fieldsListId;
                        t.getSkewedInfo().addToSkewedColValues(currentList);
                    }
                    currentList.add((String) fields[2]);
                }
            }
        });
        // We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless.
        queryText = "select \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\"," + " \"SKEWED_STRING_LIST_VALUES\".STRING_LIST_ID," + " \"SKEWED_COL_VALUE_LOC_MAP\".\"LOCATION\"," + " \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_VALUE\" " + "from \"SKEWED_COL_VALUE_LOC_MAP\"" + "  left outer join \"SKEWED_STRING_LIST_VALUES\" on \"SKEWED_COL_VALUE_LOC_MAP\"." + "\"STRING_LIST_ID_KID\" = \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" " + "where \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\" in (" + sdIds + ")" + "  and \"SKEWED_COL_VALUE_LOC_MAP\".\"STRING_LIST_ID_KID\" is not null " + "order by \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\" asc," + "  \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" asc," + "  \"SKEWED_STRING_LIST_VALUES\".\"INTEGER_IDX\" asc";
        loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {

            private Long currentListId;

            private List<String> currentList;

            @Override
            public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
                if (!t.isSetSkewedInfo()) {
                    SkewedInfo skewedInfo = new SkewedInfo();
                    skewedInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
                    t.setSkewedInfo(skewedInfo);
                }
                Map<List<String>, String> skewMap = t.getSkewedInfo().getSkewedColValueLocationMaps();
                // the last list. Instead we add list to SD first, as well as locally to add elements.
                if (fields[1] == null) {
                    // left outer join produced a list with no values
                    currentList = new ArrayList<String>();
                    currentListId = null;
                } else {
                    long fieldsListId = extractSqlLong(fields[1]);
                    if (currentListId == null || fieldsListId != currentListId) {
                        currentList = new ArrayList<String>();
                        currentListId = fieldsListId;
                    } else {
                        // value based compare.. remove first
                        skewMap.remove(currentList);
                    }
                    currentList.add((String) fields[3]);
                }
                skewMap.put(currentList, (String) fields[2]);
            }
        });
    }
    // Get FieldSchema stuff if any.
    if (!colss.isEmpty()) {
        // We are skipping the CDS table here, as it seems to be totally useless.
        queryText = "select \"CD_ID\", \"COMMENT\", \"COLUMN_NAME\", \"TYPE_NAME\"" + " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
        loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {

            @Override
            public void apply(List<FieldSchema> t, Object[] fields) {
                t.add(new FieldSchema((String) fields[2], (String) fields[3], (String) fields[1]));
            }
        });
    }
    // Finally, get all the stuff for serdes - just the params.
    queryText = "select \"SERDE_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"SERDE_PARAMS\"" + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null" + " order by \"SERDE_ID\" asc";
    loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {

        @Override
        public void apply(SerDeInfo t, Object[] fields) {
            t.putToParameters((String) fields[1], (String) fields[2]);
        }
    });
    // Perform conversion of null map values
    for (SerDeInfo t : serdes.values()) {
        t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
    }
    return orderedResult;
}
Also used : Query(javax.jdo.Query) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) List(java.util.List) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Order(org.apache.hadoop.hive.metastore.api.Order) Partition(org.apache.hadoop.hive.metastore.api.Partition) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) TreeMap(java.util.TreeMap) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 12 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterTableRenameCol.

private void analyzeAlterTableRenameCol(String[] qualified, ASTNode ast, HashMap<String, String> partSpec) throws SemanticException {
    String newComment = null;
    boolean first = false;
    String flagCol = null;
    boolean isCascade = false;
    //col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT]
    String oldColName = ast.getChild(0).getText();
    String newColName = ast.getChild(1).getText();
    String newType = getTypeStringFromAST((ASTNode) ast.getChild(2));
    int childCount = ast.getChildCount();
    for (int i = 3; i < childCount; i++) {
        ASTNode child = (ASTNode) ast.getChild(i);
        switch(child.getToken().getType()) {
            case HiveParser.StringLiteral:
                newComment = unescapeSQLString(child.getText());
                break;
            case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION:
                flagCol = unescapeIdentifier(child.getChild(0).getText());
                break;
            case HiveParser.KW_FIRST:
                first = true;
                break;
            case HiveParser.TOK_CASCADE:
                isCascade = true;
                break;
            case HiveParser.TOK_RESTRICT:
                break;
            default:
                throw new SemanticException("Unsupported token: " + child.getToken() + " for alter table");
        }
    }
    /* Validate the operation of renaming a column name. */
    Table tab = getTable(qualified);
    SkewedInfo skewInfo = tab.getTTable().getSd().getSkewedInfo();
    if ((null != skewInfo) && (null != skewInfo.getSkewedColNames()) && skewInfo.getSkewedColNames().contains(oldColName)) {
        throw new SemanticException(oldColName + ErrorMsg.ALTER_TABLE_NOT_ALLOWED_RENAME_SKEWED_COLUMN.getMsg());
    }
    String tblName = getDotName(qualified);
    AlterTableDesc alterTblDesc = new AlterTableDesc(tblName, partSpec, unescapeIdentifier(oldColName), unescapeIdentifier(newColName), newType, newComment, first, flagCol, isCascade);
    addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
}
Also used : AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork)

Example 13 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class HBaseUtils method serializeStorageDescriptor.

/**
   * Serialize a storage descriptor.
   * @param sd storage descriptor to serialize
   * @return serialized storage descriptor.
   */
static byte[] serializeStorageDescriptor(StorageDescriptor sd) {
    HbaseMetastoreProto.StorageDescriptor.Builder builder = HbaseMetastoreProto.StorageDescriptor.newBuilder();
    builder.addAllCols(convertFieldSchemaListToProto(sd.getCols()));
    if (sd.getInputFormat() != null) {
        builder.setInputFormat(sd.getInputFormat());
    }
    if (sd.getOutputFormat() != null) {
        builder.setOutputFormat(sd.getOutputFormat());
    }
    builder.setIsCompressed(sd.isCompressed());
    builder.setNumBuckets(sd.getNumBuckets());
    if (sd.getSerdeInfo() != null) {
        HbaseMetastoreProto.StorageDescriptor.SerDeInfo.Builder serdeBuilder = HbaseMetastoreProto.StorageDescriptor.SerDeInfo.newBuilder();
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            serdeBuilder.setName(serde.getName());
        }
        if (serde.getSerializationLib() != null) {
            serdeBuilder.setSerializationLib(serde.getSerializationLib());
        }
        if (serde.getParameters() != null) {
            serdeBuilder.setParameters(buildParameters(serde.getParameters()));
        }
        builder.setSerdeInfo(serdeBuilder);
    }
    if (sd.getBucketCols() != null) {
        builder.addAllBucketCols(sd.getBucketCols());
    }
    if (sd.getSortCols() != null) {
        List<Order> orders = sd.getSortCols();
        List<HbaseMetastoreProto.StorageDescriptor.Order> protoList = new ArrayList<>(orders.size());
        for (Order order : orders) {
            protoList.add(HbaseMetastoreProto.StorageDescriptor.Order.newBuilder().setColumnName(order.getCol()).setOrder(order.getOrder()).build());
        }
        builder.addAllSortCols(protoList);
    }
    if (sd.getSkewedInfo() != null) {
        HbaseMetastoreProto.StorageDescriptor.SkewedInfo.Builder skewBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.newBuilder();
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            skewBuilder.addAllSkewedColNames(skewed.getSkewedColNames());
        }
        if (skewed.getSkewedColValues() != null) {
            for (List<String> innerList : skewed.getSkewedColValues()) {
                HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.Builder listBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.newBuilder();
                listBuilder.addAllSkewedColValue(innerList);
                skewBuilder.addSkewedColValues(listBuilder);
            }
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            for (Map.Entry<List<String>, String> e : skewed.getSkewedColValueLocationMaps().entrySet()) {
                HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.Builder mapBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.newBuilder();
                mapBuilder.addAllKey(e.getKey());
                mapBuilder.setValue(e.getValue());
                skewBuilder.addSkewedColValueLocationMaps(mapBuilder);
            }
        }
        builder.setSkewedInfo(skewBuilder);
    }
    builder.setStoredAsSubDirectories(sd.isStoredAsSubDirectories());
    return builder.build().toByteArray();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 14 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class Table method getEmptyTable.

/**
   * Initialize an empty table.
   */
public static org.apache.hadoop.hive.metastore.api.Table getEmptyTable(String databaseName, String tableName) {
    StorageDescriptor sd = new StorageDescriptor();
    {
        sd.setSerdeInfo(new SerDeInfo());
        sd.setNumBuckets(-1);
        sd.setBucketCols(new ArrayList<String>());
        sd.setCols(new ArrayList<FieldSchema>());
        sd.setParameters(new HashMap<String, String>());
        sd.setSortCols(new ArrayList<Order>());
        sd.getSerdeInfo().setParameters(new HashMap<String, String>());
        // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does
        // not support a table with no columns.
        sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName());
        sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
        sd.setInputFormat(SequenceFileInputFormat.class.getName());
        sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName());
        SkewedInfo skewInfo = new SkewedInfo();
        skewInfo.setSkewedColNames(new ArrayList<String>());
        skewInfo.setSkewedColValues(new ArrayList<List<String>>());
        skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
        sd.setSkewedInfo(skewInfo);
    }
    org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table();
    {
        t.setSd(sd);
        t.setPartitionKeys(new ArrayList<FieldSchema>());
        t.setParameters(new HashMap<String, String>());
        t.setTableType(TableType.MANAGED_TABLE.toString());
        t.setDbName(databaseName);
        t.setTableName(tableName);
        t.setOwner(SessionState.getUserFromAuthenticator());
        // set create time
        t.setCreateTime((int) (System.currentTimeMillis() / 1000));
    }
    return t;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo)

Example 15 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class Hive method loadTable.

/**
   * Load a directory into a Hive Table. - Alters existing content of table with
   * the contents of loadPath. - If table does not exist - an exception is
   * thrown - files in loadPath are moved into Hive. But the directory itself is
   * not removed.
   *
   * @param loadPath
   *          Directory containing files to load into Table
   * @param tableName
   *          name of table to be loaded.
   * @param replace
   *          if true - replace files in the table, otherwise add files to table
   * @param isSrcLocal
   *          If the source directory is LOCAL
   * @param isSkewedStoreAsSubdir
   *          if list bucketing enabled
   * @param hasFollowingStatsTask
   *          if there is any following stats task
   * @param isAcid true if this is an ACID based write
   */
public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
    List<Path> newFiles = null;
    Table tbl = getTable(tableName);
    HiveConf sessionConf = SessionState.getSessionConf();
    if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) {
        newFiles = Collections.synchronizedList(new ArrayList<Path>());
    }
    if (replace) {
        Path tableDest = tbl.getPath();
        replaceFiles(tableDest, loadPath, tableDest, tableDest, sessionConf, isSrcLocal);
    } else {
        FileSystem fs;
        try {
            fs = tbl.getDataLocation().getFileSystem(sessionConf);
            copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles);
        } catch (IOException e) {
            throw new HiveException("addFiles: filesystem error in check phase", e);
        }
    }
    if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE);
    }
    //column stats will be inaccurate
    StatsSetupConst.clearColumnStatsState(tbl.getParameters());
    try {
        if (isSkewedStoreAsSubdir) {
            SkewedInfo skewedInfo = tbl.getSkewedInfo();
            // Construct list bucketing location mappings from sub-directory name.
            Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(tbl.getPath(), skewedInfo);
            // Add list bucketing location mappings.
            skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
        }
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    }
    EnvironmentContext environmentContext = null;
    if (hasFollowingStatsTask) {
        environmentContext = new EnvironmentContext();
        environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    }
    try {
        alterTable(tableName, tbl, environmentContext);
    } catch (InvalidOperationException e) {
        throw new HiveException(e);
    }
    fireInsertEvent(tbl, null, newFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) IOException(java.io.IOException) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList)

Aggregations

SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)16 ArrayList (java.util.ArrayList)12 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)12 List (java.util.List)10 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)9 HashMap (java.util.HashMap)8 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 Order (org.apache.hadoop.hive.metastore.api.Order)7 Path (org.apache.hadoop.fs.Path)4 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)4 Test (org.junit.Test)4 ByteString (com.google.protobuf.ByteString)3 IOException (java.io.IOException)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 SortedMap (java.util.SortedMap)2