Search in sources :

Example 51 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Utilities method getTableDesc.

public static TableDesc getTableDesc(Table tbl) {
    Properties props = tbl.getMetadata();
    props.put(serdeConstants.SERIALIZATION_LIB, tbl.getDeserializer().getClass().getName());
    return (new TableDesc(tbl.getInputFormatClass(), tbl.getOutputFormatClass(), props));
}
Also used : TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties)

Example 52 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TableBasedIndexHandler method generateIndexBuildTaskList.

@Override
public List<Task<?>> generateIndexBuildTaskList(org.apache.hadoop.hive.ql.metadata.Table baseTbl, org.apache.hadoop.hive.metastore.api.Index index, List<Partition> indexTblPartitions, List<Partition> baseTblPartitions, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
    try {
        TableDesc desc = Utilities.getTableDesc(indexTbl);
        List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
        List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
        if (!baseTbl.isPartitioned()) {
            // the table does not have any partition, then create index for the
            // whole table
            Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false, new PartitionDesc(desc, null), indexTbl.getTableName(), new PartitionDesc(Utilities.getTableDesc(baseTbl), null), baseTbl.getTableName(), indexTbl.getDbName());
            indexBuilderTasks.add(indexBuilder);
        } else {
            // table
            for (int i = 0; i < indexTblPartitions.size(); i++) {
                Partition indexPart = indexTblPartitions.get(i);
                Partition basePart = null;
                for (int j = 0; j < baseTblPartitions.size(); j++) {
                    if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
                        basePart = baseTblPartitions.get(j);
                        newBaseTblPartitions.add(baseTblPartitions.get(j));
                        break;
                    }
                }
                if (basePart == null) {
                    throw new RuntimeException("Partitions of base table and index table are inconsistent.");
                }
                // for each partition, spawn a map reduce task.
                Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true, new PartitionDesc(indexPart), indexTbl.getTableName(), new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
                indexBuilderTasks.add(indexBuilder);
            }
        }
        return indexBuilderTasks;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Task(org.apache.hadoop.hive.ql.exec.Task) ArrayList(java.util.ArrayList) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 53 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestFileSinkOperator method classSetup.

@BeforeClass
public static void classSetup() {
    Properties properties = new Properties();
    properties.setProperty(serdeConstants.SERIALIZATION_LIB, TFSOSerDe.class.getName());
    properties.setProperty(hive_metastoreConstants.META_TABLE_NAME, "tfs");
    nonAcidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
    properties.setProperty(serdeConstants.LIST_COLUMNS, "data");
    properties = new Properties(properties);
    properties.setProperty(hive_metastoreConstants.BUCKET_COUNT, "1");
    acidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
    tmpdir = new File(System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + "testFileSinkOperator");
    tmpdir.mkdir();
    tmpdir.deleteOnExit();
}
Also used : TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties) File(java.io.File) BeforeClass(org.junit.BeforeClass)

Example 54 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestPlan method testPlan.

public void testPlan() throws Exception {
    final String F1 = "#affiliations";
    final String F2 = "friends[0].friendid";
    try {
        // initialize a complete map reduce configuration
        ExprNodeDesc expr1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F1, "", false);
        ExprNodeDesc expr2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, F2, "", false);
        ExprNodeDesc filterExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", expr1, expr2);
        FilterDesc filterCtx = new FilterDesc(filterExpr, false);
        Operator<FilterDesc> op = OperatorFactory.get(new CompilationOpContext(), FilterDesc.class);
        op.setConf(filterCtx);
        ArrayList<String> aliasList = new ArrayList<String>();
        aliasList.add("a");
        LinkedHashMap<Path, ArrayList<String>> pa = new LinkedHashMap<>();
        pa.put(new Path("/tmp/testfolder"), aliasList);
        TableDesc tblDesc = Utilities.defaultTd;
        PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
        LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
        pt.put(new Path("/tmp/testfolder"), partDesc);
        LinkedHashMap<String, Operator<? extends OperatorDesc>> ao = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
        ao.put("a", op);
        MapredWork mrwork = new MapredWork();
        mrwork.getMapWork().setPathToAliases(pa);
        mrwork.getMapWork().setPathToPartitionInfo(pt);
        mrwork.getMapWork().setAliasToWork(ao);
        JobConf job = new JobConf(TestPlan.class);
        // serialize the configuration once ..
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        SerializationUtilities.serializePlan(mrwork, baos);
        baos.close();
        String v1 = baos.toString();
        // store into configuration
        job.set("fs.default.name", "file:///");
        Utilities.setMapRedWork(job, mrwork, new Path(System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name") + File.separator + "hive"));
        MapredWork mrwork2 = Utilities.getMapRedWork(job);
        Utilities.clearWork(job);
        // over here we should have some checks of the deserialized object against
        // the orginal object
        // System.out.println(v1);
        // serialize again
        baos.reset();
        SerializationUtilities.serializePlan(mrwork2, baos);
        baos.close();
        // verify that the two are equal
        assertEquals(v1, baos.toString());
    } catch (Exception excp) {
        excp.printStackTrace();
        throw excp;
    }
    System.out.println("Serialization/Deserialization of plan successful");
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) LinkedHashMap(java.util.LinkedHashMap) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf)

Example 55 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class HashTableDummyOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    TableDesc tbl = this.getConf().getTbl();
    try {
        Deserializer serde = tbl.getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null);
        this.outputObjInspector = serde.getObjectInspector();
    } catch (Exception e) {
        LOG.error("Generating output obj inspector from dummy object error", e);
        e.printStackTrace();
    }
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11