Search in sources :

Example 81 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class SemanticAnalyzer method createDummyFile.

// add dummy data for not removed by CombineHiveInputFormat, etc.
private Path createDummyFile() throws SemanticException {
    Path dummyPath = new Path(ctx.getMRScratchDir(), "dummy_path");
    Path dummyFile = new Path(dummyPath, "dummy_file");
    FSDataOutputStream fout = null;
    try {
        FileSystem fs = dummyFile.getFileSystem(conf);
        if (fs.exists(dummyFile)) {
            return dummyPath;
        }
        fout = fs.create(dummyFile);
        fout.write(1);
        fout.close();
    } catch (IOException e) {
        throw new SemanticException(e);
    } finally {
        IOUtils.closeStream(fout);
    }
    return dummyPath;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 82 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class SemanticAnalyzer method materializeCTE.

Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
    ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
    ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
    tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
    ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
    createTable.addChild(tableName);
    createTable.addChild(temporary);
    createTable.addChild(cte.cteNode);
    SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
    analyzer.initCtx(ctx);
    analyzer.init(false);
    // should share cte contexts
    analyzer.aliasToCTEs.putAll(aliasToCTEs);
    HiveOperation operation = queryState.getHiveOperation();
    try {
        analyzer.analyzeInternal(createTable);
    } finally {
        queryState.setCommandType(operation);
    }
    Table table = analyzer.tableDesc.toTable(conf);
    Path location = table.getDataLocation();
    try {
        location.getFileSystem(conf).mkdirs(location);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    table.setMaterializedTable(true);
    LOG.info(cteName + " will be materialized into " + location);
    cte.table = table;
    cte.source = analyzer;
    ctx.addMaterializedTable(cteName, table);
    return table;
}
Also used : HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) Path(org.apache.hadoop.fs.Path) ClassicToken(org.antlr.runtime.ClassicToken) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException)

Example 83 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class ReplicationSemanticAnalyzer method analyzeDatabaseLoad.

private void analyzeDatabaseLoad(String dbName, FileSystem fs, FileStatus dir) throws SemanticException {
    try {
        // Path being passed to us is a db dump location. We go ahead and load as needed.
        // dbName might be null or empty, in which case we keep the original db name for the new
        // database creation
        // Two steps here - first, we read the _metadata file here, and create a CreateDatabaseDesc
        // associated with that
        // Then, we iterate over all subdirs, and create table imports for each.
        EximUtil.ReadMetaData rv = new EximUtil.ReadMetaData();
        try {
            rv = EximUtil.readMetaData(fs, new Path(dir.getPath(), EximUtil.METADATA_NAME));
        } catch (IOException e) {
            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
        }
        Database dbObj = rv.getDatabase();
        if (dbObj == null) {
            throw new IllegalArgumentException("_metadata file read did not contain a db object - invalid dump.");
        }
        if ((dbName == null) || (dbName.isEmpty())) {
            // We use dbName specified as long as it is not null/empty. If so, then we use the original
            // name
            // recorded in the thrift object.
            dbName = dbObj.getName();
        }
        CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc();
        createDbDesc.setName(dbName);
        createDbDesc.setComment(dbObj.getDescription());
        createDbDesc.setDatabaseProperties(dbObj.getParameters());
        // note that we do not set location - for repl load, we want that auto-created.
        createDbDesc.setIfNotExists(false);
        // If it exists, we want this to be an error condition. Repl Load is not intended to replace a
        // db.
        // TODO: we might revisit this in create-drop-recreate cases, needs some thinking on.
        Task<? extends Serializable> createDbTask = TaskFactory.get(new DDLWork(inputs, outputs, createDbDesc), conf);
        rootTasks.add(createDbTask);
        FileStatus[] dirsInDbPath = fs.listStatus(dir.getPath(), EximUtil.getDirectoryFilter(fs));
        for (FileStatus tableDir : dirsInDbPath) {
            analyzeTableLoad(dbName, null, tableDir.getPath().toUri().toString(), createDbTask, null, null);
        }
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CreateDatabaseDesc(org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc) Database(org.apache.hadoop.hive.metastore.api.Database)

Example 84 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class ReplicationSemanticAnalyzer method dumpTbl.

/**
   *
   * @param ast
   * @param dbName
   * @param tblName
   * @param dbRoot
   * @return tbl dumped path
   * @throws SemanticException
   */
private Path dumpTbl(ASTNode ast, String dbName, String tblName, Path dbRoot) throws SemanticException {
    Path tableRoot = new Path(dbRoot, tblName);
    try {
        URI toURI = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString());
        TableSpec ts = new TableSpec(db, conf, dbName + "." + tblName, null);
        ExportSemanticAnalyzer.prepareExport(ast, toURI, ts, getNewReplicationSpec(), db, conf, ctx, rootTasks, inputs, outputs, LOG);
    } catch (HiveException e) {
        // TODO : simple wrap & rethrow for now, clean up with error codes
        throw new SemanticException(e);
    }
    return tableRoot;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) URI(java.net.URI)

Example 85 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class FSStatsPublisher method init.

@Override
public boolean init(StatsCollectionContext context) {
    try {
        for (String tmpDir : context.getStatsTmpDirs()) {
            Path statsDir = new Path(tmpDir);
            LOG.debug("Initing FSStatsPublisher with : " + statsDir);
            statsDir.getFileSystem(context.getHiveConf()).mkdirs(statsDir);
            LOG.info("created : " + statsDir);
        }
        return true;
    } catch (IOException e) {
        LOG.error("Failed to create dir", e);
        return false;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException)

Aggregations

Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException (java.io.IOException)2631 Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File (java.io.File)987 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI (java.net.URI)465 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException (java.io.FileNotFoundException)441 FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316