use of org.apache.hadoop.fs.ContentSummary in project hive by apache.
the class MapReduceCompiler method decideExecMode.
@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
// bypass for explain queries for now
if (ctx.isExplainSkipExecution()) {
return;
}
// user has told us to run in local mode or doesn't want auto-local mode
if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
return;
}
final Context lCtx = ctx;
PathFilter p = new PathFilter() {
@Override
public boolean accept(Path file) {
return !lCtx.isMRTmpFileURI(file.toUri().getPath());
}
};
List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
// map-reduce jobs will be run locally based on data size
// first find out if any of the jobs needs to run non-locally
boolean hasNonLocalJob = false;
for (ExecDriver mrtask : mrtasks) {
try {
ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
long estimatedInput;
if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
// If the global limit optimization is triggered, we will
// estimate input data actually needed based on limit rows.
// estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
//
long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
estimatedInput = estimatedInput * (estimatedNumMap + 1);
} else {
estimatedInput = inputSummary.getLength();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
}
if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
hasNonLocalJob = true;
break;
} else {
mrtask.setLocalMode(true);
}
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (!hasNonLocalJob) {
// Entire query can be run locally.
// Save the current tracker value and restore it when done.
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Automatically selecting local only mode for query");
}
}
use of org.apache.hadoop.fs.ContentSummary in project ignite by apache.
the class IgniteHadoopFileSystemAbstractSelfTest method compareContent.
/**
* Compare content of two folders.
*
* @param cfg Paths configuration to compare.
* @throws IOException If failed.
*/
@SuppressWarnings("deprecation")
private void compareContent(Config cfg) throws IOException {
Deque<Config> queue = new LinkedList<>();
queue.add(cfg);
for (Config c = queue.poll(); c != null; c = queue.poll()) {
boolean exists;
assertEquals("Check existence [src=" + c.src + ", dest=" + c.dest + ']', exists = c.srcFs.exists(c.src), c.destFs.exists(c.dest));
assertEquals("Check types (files?) [src=" + c.src + ", dest=" + c.dest + ']', c.srcFs.isFile(c.src), c.destFs.isFile(c.dest));
if (exists) {
ContentSummary srcSummary = c.srcFs.getContentSummary(c.src);
ContentSummary dstSummary = c.destFs.getContentSummary(c.dest);
assertEquals("Directories number comparison failed", srcSummary.getDirectoryCount(), dstSummary.getDirectoryCount());
assertEquals("Files number comparison failed", srcSummary.getFileCount(), dstSummary.getFileCount());
assertEquals("Space consumed comparison failed", srcSummary.getSpaceConsumed(), dstSummary.getSpaceConsumed());
assertEquals("Length comparison failed", srcSummary.getLength(), dstSummary.getLength());
// Intentionally skipping quotas checks as they can vary.
} else {
assertContentSummaryFails(c.srcFs, c.src);
assertContentSummaryFails(c.destFs, c.dest);
}
if (!exists)
continue;
FileStatus[] srcSt = c.srcFs.listStatus(c.src);
FileStatus[] destSt = c.destFs.listStatus(c.dest);
assert srcSt != null && destSt != null : "Both not null" + " [srcSt=" + Arrays.toString(srcSt) + ", destSt=" + Arrays.toString(destSt) + ']';
assertEquals("Check listing [src=" + c.src + ", dest=" + c.dest + ']', srcSt.length, destSt.length);
// Listing of the file returns the only element with this file.
if (srcSt.length == 1 && c.src.equals(srcSt[0].getPath())) {
assertEquals(c.dest, destSt[0].getPath());
assertTrue("Expects file [src=" + c.src + ", srcSt[0]=" + srcSt[0] + ']', !srcSt[0].isDir());
assertTrue("Expects file [dest=" + c.dest + ", destSt[0]=" + destSt[0] + ']', !destSt[0].isDir());
FSDataInputStream srcIn = null;
FSDataInputStream destIn = null;
try {
srcIn = c.srcFs.open(c.src);
destIn = c.destFs.open(c.dest);
GridTestIoUtils.assertEqualStreams(srcIn, destIn, srcSt[0].getLen());
} finally {
U.closeQuiet(srcIn);
U.closeQuiet(destIn);
}
// Skip the following directories validations.
continue;
}
// Sort both arrays.
Arrays.sort(srcSt, STATUS_COMPARATOR);
Arrays.sort(destSt, STATUS_COMPARATOR);
for (int i = 0; i < srcSt.length; i++) // Dig in deep to the last leaf, instead of collecting full tree in memory.
queue.addFirst(new Config(c.srcFs, srcSt[i].getPath(), c.destFs, destSt[i].getPath()));
// Add non-existent file to check in the current folder.
String rndFile = "Non-existent file #" + UUID.randomUUID().toString();
queue.addFirst(new Config(c.srcFs, new Path(c.src, rndFile), c.destFs, new Path(c.dest, rndFile)));
}
}
use of org.apache.hadoop.fs.ContentSummary in project hadoop by apache.
the class TestQuota method testMaxSpaceQuotas.
/**
* Test limit cases for setting space quotas.
*/
@Test
public void testMaxSpaceQuotas() throws Exception {
final Path parent = new Path(PathUtils.getTestPath(getClass()), GenericTestUtils.getMethodName());
assertTrue(dfs.mkdirs(parent));
final FileSystem fs = cluster.getFileSystem();
assertTrue("Not a HDFS: " + fs.getUri(), fs instanceof DistributedFileSystem);
final DistributedFileSystem dfs = (DistributedFileSystem) fs;
// create test directory
final Path testFolder = new Path(parent, "testFolder");
assertTrue(dfs.mkdirs(testFolder));
// setting namespace quota to Long.MAX_VALUE - 1 should work
dfs.setQuota(testFolder, Long.MAX_VALUE - 1, 10);
ContentSummary c = dfs.getContentSummary(testFolder);
compareQuotaUsage(c, dfs, testFolder);
assertTrue("Quota not set properly", c.getQuota() == Long.MAX_VALUE - 1);
// setting diskspace quota to Long.MAX_VALUE - 1 should work
dfs.setQuota(testFolder, 10, Long.MAX_VALUE - 1);
c = dfs.getContentSummary(testFolder);
compareQuotaUsage(c, dfs, testFolder);
assertTrue("Quota not set properly", c.getSpaceQuota() == Long.MAX_VALUE - 1);
// setting namespace quota to Long.MAX_VALUE should not work + no error
dfs.setQuota(testFolder, Long.MAX_VALUE, 10);
c = dfs.getContentSummary(testFolder);
compareQuotaUsage(c, dfs, testFolder);
assertTrue("Quota should not have changed", c.getQuota() == 10);
// setting diskspace quota to Long.MAX_VALUE should not work + no error
dfs.setQuota(testFolder, 10, Long.MAX_VALUE);
c = dfs.getContentSummary(testFolder);
compareQuotaUsage(c, dfs, testFolder);
assertTrue("Quota should not have changed", c.getSpaceQuota() == 10);
// setting namespace quota to Long.MAX_VALUE + 1 should not work + error
try {
dfs.setQuota(testFolder, Long.MAX_VALUE + 1, 10);
fail("Exception not thrown");
} catch (IllegalArgumentException e) {
// Expected
}
// setting diskspace quota to Long.MAX_VALUE + 1 should not work + error
try {
dfs.setQuota(testFolder, 10, Long.MAX_VALUE + 1);
fail("Exception not thrown");
} catch (IllegalArgumentException e) {
// Expected
}
}
use of org.apache.hadoop.fs.ContentSummary in project hadoop by apache.
the class TestQuota method testHugeFileCount.
/**
* File count on root , should return total value of files in Filesystem
* when one folder contains files more than "dfs.content-summary.limit".
*/
@Test
public void testHugeFileCount() throws IOException {
final Path parent = new Path(PathUtils.getTestPath(getClass()), GenericTestUtils.getMethodName());
assertTrue(dfs.mkdirs(parent));
for (int i = 1; i <= 5; i++) {
FSDataOutputStream out = dfs.create(new Path(parent, "Folder1/" + "file" + i), (short) 1);
out.close();
}
FSDataOutputStream out = dfs.create(new Path(parent, "Folder2/file6"), (short) 1);
out.close();
ContentSummary contentSummary = dfs.getContentSummary(parent);
compareQuotaUsage(contentSummary, dfs, parent);
assertEquals(6, contentSummary.getFileCount());
}
use of org.apache.hadoop.fs.ContentSummary in project hadoop by apache.
the class TestQuota method testSpaceCommands.
/**
* Test HDFS operations that change disk space consumed by a directory tree.
* namely create, rename, delete, append, and setReplication.
*
* This is based on testNamespaceCommands() above.
*/
@Test
public void testSpaceCommands() throws Exception {
final Path parent = new Path(PathUtils.getTestPath(getClass()), GenericTestUtils.getMethodName());
assertTrue(dfs.mkdirs(parent));
int fileLen = 1024;
short replication = 3;
int fileSpace = fileLen * replication;
// create directory nqdir0/qdir1/qdir20/nqdir30
assertTrue(dfs.mkdirs(new Path(parent, "nqdir0/qdir1/qdir20/nqdir30")));
// set the quota of nqdir0/qdir1 to 4 * fileSpace
final Path quotaDir1 = new Path(parent, "nqdir0/qdir1");
dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 4 * fileSpace);
ContentSummary c = dfs.getContentSummary(quotaDir1);
compareQuotaUsage(c, dfs, quotaDir1);
assertEquals(c.getSpaceQuota(), 4 * fileSpace);
// set the quota of nqdir0/qdir1/qdir20 to 6 * fileSpace
final Path quotaDir20 = new Path(parent, "nqdir0/qdir1/qdir20");
dfs.setQuota(quotaDir20, HdfsConstants.QUOTA_DONT_SET, 6 * fileSpace);
c = dfs.getContentSummary(quotaDir20);
compareQuotaUsage(c, dfs, quotaDir20);
assertEquals(c.getSpaceQuota(), 6 * fileSpace);
// Create nqdir0/qdir1/qdir21 and set its space quota to 2 * fileSpace
final Path quotaDir21 = new Path(parent, "nqdir0/qdir1/qdir21");
assertTrue(dfs.mkdirs(quotaDir21));
dfs.setQuota(quotaDir21, HdfsConstants.QUOTA_DONT_SET, 2 * fileSpace);
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceQuota(), 2 * fileSpace);
// 5: Create directory nqdir0/qdir1/qdir21/nqdir32
Path tempPath = new Path(quotaDir21, "nqdir32");
assertTrue(dfs.mkdirs(tempPath));
// create a file under nqdir32/fileDir
DFSTestUtil.createFile(dfs, new Path(tempPath, "fileDir/file1"), fileLen, replication, 0);
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceConsumed(), fileSpace);
// Create a larger file nqdir0/qdir1/qdir21/nqdir33/
boolean hasException = false;
try {
DFSTestUtil.createFile(dfs, new Path(quotaDir21, "nqdir33/file2"), 2 * fileLen, replication, 0);
} catch (DSQuotaExceededException e) {
hasException = true;
}
assertTrue(hasException);
// delete nqdir33
assertTrue(dfs.delete(new Path(quotaDir21, "nqdir33"), true));
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceConsumed(), fileSpace);
assertEquals(c.getSpaceQuota(), 2 * fileSpace);
// Verify space before the move:
c = dfs.getContentSummary(quotaDir20);
compareQuotaUsage(c, dfs, quotaDir20);
assertEquals(c.getSpaceConsumed(), 0);
// Move nqdir0/qdir1/qdir21/nqdir32 nqdir0/qdir1/qdir20/nqdir30
Path dstPath = new Path(quotaDir20, "nqdir30");
Path srcPath = new Path(quotaDir21, "nqdir32");
assertTrue(dfs.rename(srcPath, dstPath));
// verify space after the move
c = dfs.getContentSummary(quotaDir20);
assertEquals(c.getSpaceConsumed(), fileSpace);
// verify space for its parent
c = dfs.getContentSummary(quotaDir1);
compareQuotaUsage(c, dfs, quotaDir1);
assertEquals(c.getSpaceConsumed(), fileSpace);
// verify space for source for the move
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceConsumed(), 0);
final Path file2 = new Path(dstPath, "fileDir/file2");
int file2Len = 2 * fileLen;
// create a larger file under nqdir0/qdir1/qdir20/nqdir30
DFSTestUtil.createFile(dfs, file2, file2Len, replication, 0);
c = dfs.getContentSummary(quotaDir20);
assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceConsumed(), 0);
// Reverse: Move nqdir0/qdir1/qdir20/nqdir30 to nqdir0/qdir1/qdir21/
hasException = false;
try {
assertFalse(dfs.rename(dstPath, srcPath));
} catch (DSQuotaExceededException e) {
hasException = true;
}
assertTrue(hasException);
// make sure no intermediate directories left by failed rename
assertFalse(dfs.exists(srcPath));
// directory should exist
assertTrue(dfs.exists(dstPath));
// verify space after the failed move
c = dfs.getContentSummary(quotaDir20);
assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
c = dfs.getContentSummary(quotaDir21);
compareQuotaUsage(c, dfs, quotaDir21);
assertEquals(c.getSpaceConsumed(), 0);
// Test Append :
// verify space quota
c = dfs.getContentSummary(quotaDir1);
compareQuotaUsage(c, dfs, quotaDir1);
assertEquals(c.getSpaceQuota(), 4 * fileSpace);
// verify space before append;
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
OutputStream out = dfs.append(file2);
// appending 1 fileLen should succeed
out.write(new byte[fileLen]);
out.close();
// after append
file2Len += fileLen;
// verify space after append;
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 4 * fileSpace);
// now increase the quota for quotaDir1
dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 5 * fileSpace);
// Now, appending more than 1 fileLen should result in an error
out = dfs.append(file2);
hasException = false;
try {
out.write(new byte[fileLen + 1024]);
out.flush();
out.close();
} catch (DSQuotaExceededException e) {
hasException = true;
IOUtils.closeStream(out);
}
assertTrue(hasException);
// after partial append
file2Len += fileLen;
// verify space after partial append
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 5 * fileSpace);
// Test set replication :
// first reduce the replication
dfs.setReplication(file2, (short) (replication - 1));
// verify that space is reduced by file2Len
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
// now try to increase the replication and and expect an error.
hasException = false;
try {
dfs.setReplication(file2, (short) (replication + 1));
} catch (DSQuotaExceededException e) {
hasException = true;
}
assertTrue(hasException);
// verify space consumed remains unchanged.
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
// now increase the quota for quotaDir1 and quotaDir20
dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 10 * fileSpace);
dfs.setQuota(quotaDir20, HdfsConstants.QUOTA_DONT_SET, 10 * fileSpace);
// then increasing replication should be ok.
dfs.setReplication(file2, (short) (replication + 1));
// verify increase in space
c = dfs.getContentSummary(dstPath);
compareQuotaUsage(c, dfs, dstPath);
assertEquals(c.getSpaceConsumed(), 5 * fileSpace + file2Len);
// Test HDFS-2053 :
// Create directory hdfs-2053
final Path quotaDir2053 = new Path(parent, "hdfs-2053");
assertTrue(dfs.mkdirs(quotaDir2053));
// Create subdirectories /hdfs-2053/{A,B,C}
final Path quotaDir2053_A = new Path(quotaDir2053, "A");
assertTrue(dfs.mkdirs(quotaDir2053_A));
final Path quotaDir2053_B = new Path(quotaDir2053, "B");
assertTrue(dfs.mkdirs(quotaDir2053_B));
final Path quotaDir2053_C = new Path(quotaDir2053, "C");
assertTrue(dfs.mkdirs(quotaDir2053_C));
// Factors to vary the sizes of test files created in each subdir.
// The actual factors are not really important but they allow us to create
// identifiable file sizes per subdir, which helps during debugging.
int sizeFactorA = 1;
int sizeFactorB = 2;
int sizeFactorC = 4;
// Set space quota for subdirectory C
dfs.setQuota(quotaDir2053_C, HdfsConstants.QUOTA_DONT_SET, (sizeFactorC + 1) * fileSpace);
c = dfs.getContentSummary(quotaDir2053_C);
compareQuotaUsage(c, dfs, quotaDir2053_C);
assertEquals(c.getSpaceQuota(), (sizeFactorC + 1) * fileSpace);
// Create a file under subdirectory A
DFSTestUtil.createFile(dfs, new Path(quotaDir2053_A, "fileA"), sizeFactorA * fileLen, replication, 0);
c = dfs.getContentSummary(quotaDir2053_A);
compareQuotaUsage(c, dfs, quotaDir2053_A);
assertEquals(c.getSpaceConsumed(), sizeFactorA * fileSpace);
// Create a file under subdirectory B
DFSTestUtil.createFile(dfs, new Path(quotaDir2053_B, "fileB"), sizeFactorB * fileLen, replication, 0);
c = dfs.getContentSummary(quotaDir2053_B);
compareQuotaUsage(c, dfs, quotaDir2053_B);
assertEquals(c.getSpaceConsumed(), sizeFactorB * fileSpace);
// Create a file under subdirectory C (which has a space quota)
DFSTestUtil.createFile(dfs, new Path(quotaDir2053_C, "fileC"), sizeFactorC * fileLen, replication, 0);
c = dfs.getContentSummary(quotaDir2053_C);
compareQuotaUsage(c, dfs, quotaDir2053_C);
assertEquals(c.getSpaceConsumed(), sizeFactorC * fileSpace);
// Check space consumed for /hdfs-2053
c = dfs.getContentSummary(quotaDir2053);
compareQuotaUsage(c, dfs, quotaDir2053);
assertEquals(c.getSpaceConsumed(), (sizeFactorA + sizeFactorB + sizeFactorC) * fileSpace);
}
Aggregations