use of org.apache.hadoop.fs.Path in project hive by apache.
the class SemanticAnalyzer method createDummyFile.
// add dummy data for not removed by CombineHiveInputFormat, etc.
private Path createDummyFile() throws SemanticException {
Path dummyPath = new Path(ctx.getMRScratchDir(), "dummy_path");
Path dummyFile = new Path(dummyPath, "dummy_file");
FSDataOutputStream fout = null;
try {
FileSystem fs = dummyFile.getFileSystem(conf);
if (fs.exists(dummyFile)) {
return dummyPath;
}
fout = fs.create(dummyFile);
fout.write(1);
fout.close();
} catch (IOException e) {
throw new SemanticException(e);
} finally {
IOUtils.closeStream(fout);
}
return dummyPath;
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class SemanticAnalyzer method materializeCTE.
Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
createTable.addChild(tableName);
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);
SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);
// should share cte contexts
analyzer.aliasToCTEs.putAll(aliasToCTEs);
HiveOperation operation = queryState.getHiveOperation();
try {
analyzer.analyzeInternal(createTable);
} finally {
queryState.setCommandType(operation);
}
Table table = analyzer.tableDesc.toTable(conf);
Path location = table.getDataLocation();
try {
location.getFileSystem(conf).mkdirs(location);
} catch (IOException e) {
throw new HiveException(e);
}
table.setMaterializedTable(true);
LOG.info(cteName + " will be materialized into " + location);
cte.table = table;
cte.source = analyzer;
ctx.addMaterializedTable(cteName, table);
return table;
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class ReplicationSemanticAnalyzer method analyzeDatabaseLoad.
private void analyzeDatabaseLoad(String dbName, FileSystem fs, FileStatus dir) throws SemanticException {
try {
// Path being passed to us is a db dump location. We go ahead and load as needed.
// dbName might be null or empty, in which case we keep the original db name for the new
// database creation
// Two steps here - first, we read the _metadata file here, and create a CreateDatabaseDesc
// associated with that
// Then, we iterate over all subdirs, and create table imports for each.
EximUtil.ReadMetaData rv = new EximUtil.ReadMetaData();
try {
rv = EximUtil.readMetaData(fs, new Path(dir.getPath(), EximUtil.METADATA_NAME));
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
Database dbObj = rv.getDatabase();
if (dbObj == null) {
throw new IllegalArgumentException("_metadata file read did not contain a db object - invalid dump.");
}
if ((dbName == null) || (dbName.isEmpty())) {
// We use dbName specified as long as it is not null/empty. If so, then we use the original
// name
// recorded in the thrift object.
dbName = dbObj.getName();
}
CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc();
createDbDesc.setName(dbName);
createDbDesc.setComment(dbObj.getDescription());
createDbDesc.setDatabaseProperties(dbObj.getParameters());
// note that we do not set location - for repl load, we want that auto-created.
createDbDesc.setIfNotExists(false);
// If it exists, we want this to be an error condition. Repl Load is not intended to replace a
// db.
// TODO: we might revisit this in create-drop-recreate cases, needs some thinking on.
Task<? extends Serializable> createDbTask = TaskFactory.get(new DDLWork(inputs, outputs, createDbDesc), conf);
rootTasks.add(createDbTask);
FileStatus[] dirsInDbPath = fs.listStatus(dir.getPath(), EximUtil.getDirectoryFilter(fs));
for (FileStatus tableDir : dirsInDbPath) {
analyzeTableLoad(dbName, null, tableDir.getPath().toUri().toString(), createDbTask, null, null);
}
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class ReplicationSemanticAnalyzer method dumpTbl.
/**
*
* @param ast
* @param dbName
* @param tblName
* @param dbRoot
* @return tbl dumped path
* @throws SemanticException
*/
private Path dumpTbl(ASTNode ast, String dbName, String tblName, Path dbRoot) throws SemanticException {
Path tableRoot = new Path(dbRoot, tblName);
try {
URI toURI = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString());
TableSpec ts = new TableSpec(db, conf, dbName + "." + tblName, null);
ExportSemanticAnalyzer.prepareExport(ast, toURI, ts, getNewReplicationSpec(), db, conf, ctx, rootTasks, inputs, outputs, LOG);
} catch (HiveException e) {
// TODO : simple wrap & rethrow for now, clean up with error codes
throw new SemanticException(e);
}
return tableRoot;
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class FSStatsPublisher method init.
@Override
public boolean init(StatsCollectionContext context) {
try {
for (String tmpDir : context.getStatsTmpDirs()) {
Path statsDir = new Path(tmpDir);
LOG.debug("Initing FSStatsPublisher with : " + statsDir);
statsDir.getFileSystem(context.getHiveConf()).mkdirs(statsDir);
LOG.info("created : " + statsDir);
}
return true;
} catch (IOException e) {
LOG.error("Failed to create dir", e);
return false;
}
}
Aggregations