Examples with TableDesc - org.apache.hadoop.hive.ql.plan.TableDesc

Example 1 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestUtilities method runTestGetInputSummary.

private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
    // creates scratch directories needed by the Context object
    SessionState.start(new HiveConf());
    MapWork mapWork = new MapWork();
    Context context = new Context(jobConf);
    LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
    LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
    TableScanOperator scanOp = new TableScanOperator();
    PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
    String testTableName = "testTable";
    Path testTablePath = new Path(testTableName);
    Path[] testPartitionsPaths = new Path[numOfPartitions];
    for (int i = 0; i < numOfPartitions; i++) {
        String testPartitionName = "p=" + 1;
        testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
        pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
        pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
        mapWork.getAliasToWork().put(testPartitionName, scanOp);
    }
    mapWork.setPathToAliases(pathToAliasTable);
    mapWork.setPathToPartitionInfo(pathToPartitionInfo);
    FileSystem fs = FileSystem.getLocal(jobConf);
    try {
        fs.mkdirs(testTablePath);
        byte[] data = new byte[bytesPerFile];
        for (int i = 0; i < numOfPartitions; i++) {
            fs.mkdirs(testPartitionsPaths[i]);
            FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
            out.write(data);
            out.close();
        }
        return Utilities.getInputSummary(context, mapWork, null);
    } finally {
        if (fs.exists(testTablePath)) {
            fs.delete(testTablePath, true);
        }
    }
}

Also used : Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 2 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestOperators method testScriptOperator.

public void testScriptOperator() throws Throwable {
    try {
        System.out.println("Testing Script Operator");
        // col1
        ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1");
        // col2
        ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0");
        ExprNodeDesc expr2 = new ExprNodeConstantDesc("1");
        ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", expr1, expr2);
        // select operator to project these two columns
        ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
        earr.add(exprDesc1);
        earr.add(exprDesc2);
        ArrayList<String> outputCols = new ArrayList<String>();
        for (int i = 0; i < earr.size(); i++) {
            outputCols.add("_col" + i);
        }
        SelectDesc selectCtx = new SelectDesc(earr, outputCols);
        Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
        op.setConf(selectCtx);
        // scriptOperator to echo the output of the select
        TableDesc scriptOutput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
        TableDesc scriptInput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
        ScriptDesc sd = new ScriptDesc("cat", scriptOutput, TextRecordWriter.class, scriptInput, TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key"));
        Operator<ScriptDesc> sop = OperatorFactory.getAndMakeChild(sd, op);
        // Collect operator to observe the output of the script
        CollectDesc cd = new CollectDesc(Integer.valueOf(10));
        CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, sop);
        op.initialize(new JobConf(TestOperators.class), new ObjectInspector[] { r[0].oi });
        // evaluate on row
        for (int i = 0; i < 5; i++) {
            op.process(r[i].o, 0);
        }
        op.close(false);
        InspectableObject io = new InspectableObject();
        for (int i = 0; i < 5; i++) {
            cdop.retrieve(io);
            System.out.println("[" + i + "] io.o=" + io.o);
            System.out.println("[" + i + "] io.oi=" + io.oi);
            StructObjectInspector soi = (StructObjectInspector) io.oi;
            assert (soi != null);
            StructField a = soi.getStructFieldRef("a");
            StructField b = soi.getStructFieldRef("b");
            assertEquals("" + (i + 1), ((PrimitiveObjectInspector) a.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, a)));
            assertEquals((i) + "1", ((PrimitiveObjectInspector) b.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, b)));
        }
        System.out.println("Script Operator ok");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc) ArrayList(java.util.ArrayList) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestUtilities method testGetInputPathsWithEmptyTables.

/**
   * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
   * can process two different empty tables without throwing any exceptions.
   */
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
    String alias1Name = "alias1";
    String alias2Name = "alias2";
    MapWork mapWork1 = new MapWork();
    MapWork mapWork2 = new MapWork();
    JobConf jobConf = new JobConf();
    Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
    Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
    PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
    TableDesc mockTableDesc = mock(TableDesc.class);
    when(mockTableDesc.isNonNative()).thenReturn(false);
    when(mockTableDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
    doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
    mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
    mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
    mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
    mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
    mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
    mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
    List<Path> inputPaths = new ArrayList<>();
    try {
        Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
        assertEquals(inputPaths.size(), 2);
    } finally {
        File file;
        for (Path path : inputPaths) {
            file = new File(path.toString());
            if (file.exists()) {
                file.delete();
            }
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Properties(java.util.Properties) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) File(java.io.File) Test(org.junit.Test)

Example 4 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestOperators method testMapOperator.

public void testMapOperator() throws Throwable {
    try {
        System.out.println("Testing Map Operator");
        // initialize configuration
        JobConf hconf = new JobConf(TestOperators.class);
        hconf.set(MRJobConfig.MAP_INPUT_FILE, "hdfs:///testDir/testFile");
        IOContextMap.get(hconf).setInputPath(new Path("hdfs:///testDir/testFile"));
        // initialize pathToAliases
        ArrayList<String> aliases = new ArrayList<String>();
        aliases.add("a");
        aliases.add("b");
        LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
        pathToAliases.put(new Path("hdfs:///testDir"), aliases);
        // initialize pathToTableInfo
        // Default: treat the table as a single column "col"
        TableDesc td = Utilities.defaultTd;
        PartitionDesc pd = new PartitionDesc(td, null);
        LinkedHashMap<Path, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
        pathToPartitionInfo.put(new Path("hdfs:///testDir"), pd);
        // initialize aliasToWork
        CompilationOpContext ctx = new CompilationOpContext();
        CollectDesc cd = new CollectDesc(Integer.valueOf(1));
        CollectOperator cdop1 = (CollectOperator) OperatorFactory.get(ctx, CollectDesc.class);
        cdop1.setConf(cd);
        CollectOperator cdop2 = (CollectOperator) OperatorFactory.get(ctx, CollectDesc.class);
        cdop2.setConf(cd);
        LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
        aliasToWork.put("a", cdop1);
        aliasToWork.put("b", cdop2);
        // initialize mapredWork
        MapredWork mrwork = new MapredWork();
        mrwork.getMapWork().setPathToAliases(pathToAliases);
        mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo);
        mrwork.getMapWork().setAliasToWork(aliasToWork);
        // get map operator and initialize it
        MapOperator mo = new MapOperator(new CompilationOpContext());
        mo.initializeAsRoot(hconf, mrwork.getMapWork());
        Text tw = new Text();
        InspectableObject io1 = new InspectableObject();
        InspectableObject io2 = new InspectableObject();
        for (int i = 0; i < 5; i++) {
            String answer = "[[" + i + ", " + (i + 1) + ", " + (i + 2) + "]]";
            tw.set("" + i + "" + (i + 1) + "" + (i + 2));
            mo.process(tw);
            cdop1.retrieve(io1);
            cdop2.retrieve(io2);
            System.out.println("io1.o.toString() = " + io1.o.toString());
            System.out.println("io2.o.toString() = " + io2.o.toString());
            System.out.println("answer.toString() = " + answer.toString());
            assertEquals(answer.toString(), io1.o.toString());
            assertEquals(answer.toString(), io2.o.toString());
        }
        System.out.println("Map Operator ok");
    } catch (Throwable e) {
        e.printStackTrace();
        throw (e);
    }
}

Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc) Text(org.apache.hadoop.io.Text) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 5 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestDynamicPartitionPruner method createMockMapWork.

private MapWork createMockMapWork(TestSource... testSources) {
    MapWork mapWork = mock(MapWork.class);
    Map<String, List<TableDesc>> tableMap = new HashMap<>();
    Map<String, List<String>> columnMap = new HashMap<>();
    Map<String, List<String>> typeMap = new HashMap<>();
    Map<String, List<ExprNodeDesc>> exprMap = new HashMap<>();
    int count = 0;
    for (TestSource testSource : testSources) {
        for (int i = 0; i < testSource.numExpressions; i++) {
            List<TableDesc> tableDescList = tableMap.get(testSource.vertexName);
            if (tableDescList == null) {
                tableDescList = new LinkedList<>();
                tableMap.put(testSource.vertexName, tableDescList);
            }
            tableDescList.add(mock(TableDesc.class));
            List<String> columnList = columnMap.get(testSource.vertexName);
            if (columnList == null) {
                columnList = new LinkedList<>();
                columnMap.put(testSource.vertexName, columnList);
            }
            columnList.add(testSource.vertexName + "c_" + count + "_" + i);
            List<String> typeList = typeMap.get(testSource.vertexName);
            if (typeList == null) {
                typeList = new LinkedList<>();
                typeMap.put(testSource.vertexName, typeList);
            }
            typeList.add("string");
            List<ExprNodeDesc> exprNodeDescList = exprMap.get(testSource.vertexName);
            if (exprNodeDescList == null) {
                exprNodeDescList = new LinkedList<>();
                exprMap.put(testSource.vertexName, exprNodeDescList);
            }
            exprNodeDescList.add(mock(ExprNodeDesc.class));
        }
        count++;
    }
    doReturn(tableMap).when(mapWork).getEventSourceTableDescMap();
    doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap();
    doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap();
    doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();
    return mapWork;
}

Also used : HashMap(java.util.HashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) List(java.util.List) LinkedList(java.util.LinkedList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)80 ArrayList (java.util.ArrayList)40 Path (org.apache.hadoop.fs.Path)33 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)27 HashMap (java.util.HashMap)24 LinkedHashMap (java.util.LinkedHashMap)21 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)21 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 Properties (java.util.Properties)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)14 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)14 List (java.util.List)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 JobConf (org.apache.hadoop.mapred.JobConf)13 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)11 IOException (java.io.IOException)10 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)10