Search in sources :

Example 1 with FileFormatException

use of org.apache.orc.FileFormatException in project hive by apache.

the class PostExecOrcFileDump method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    LOG.info("Executing post execution hook to print orc file dump..");
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    FetchTask fetchTask = plan.getFetchTask();
    if (fetchTask != null) {
        SessionState ss = SessionState.get();
        SessionState.LogHelper console = ss.getConsole();
        // file dump should write to session state console's error stream
        PrintStream old = System.out;
        System.setOut(console.getErrStream());
        FetchWork fetchWork = fetchTask.getWork();
        boolean partitionedTable = fetchWork.isPartitioned();
        List<Path> directories;
        if (partitionedTable) {
            LOG.info("Printing orc file dump for files from partitioned directory..");
            directories = fetchWork.getPartDir();
        } else {
            LOG.info("Printing orc file dump for files from table directory..");
            directories = Lists.newArrayList();
            directories.add(fetchWork.getTblDir());
        }
        for (Path dir : directories) {
            FileSystem fs = dir.getFileSystem(conf);
            List<FileStatus> fileList = HdfsUtils.listLocatedStatus(fs, dir, hiddenFileFilter);
            for (FileStatus fileStatus : fileList) {
                LOG.info("Printing orc file dump for " + fileStatus.getPath());
                if (fileStatus.getLen() > 0) {
                    try {
                        // just creating orc reader is going to do sanity checks to make sure its valid ORC file
                        OrcFile.createReader(fs, fileStatus.getPath());
                        console.printError("-- BEGIN ORC FILE DUMP --");
                        FileDump.main(new String[] { fileStatus.getPath().toString(), "--rowindex=*" });
                        console.printError("-- END ORC FILE DUMP --");
                    } catch (FileFormatException e) {
                        LOG.warn("File " + fileStatus.getPath() + " is not ORC. Skip printing orc file dump");
                    } catch (IOException e) {
                        LOG.warn("Skip printing orc file dump. Exception: " + e.getMessage());
                    }
                } else {
                    LOG.warn("Zero length file encountered. Skip printing orc file dump.");
                }
            }
        }
        // restore the old out stream
        System.out.flush();
        System.setOut(old);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState) PrintStream(java.io.PrintStream) FileStatus(org.apache.hadoop.fs.FileStatus) FileFormatException(org.apache.orc.FileFormatException) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) FileSystem(org.apache.hadoop.fs.FileSystem) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Aggregations

IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)1 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)1 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)1 SessionState (org.apache.hadoop.hive.ql.session.SessionState)1 FileFormatException (org.apache.orc.FileFormatException)1