Search in sources :

Example 66 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.

the class TestRCFile method testCloseForErroneousRCFile.

@Test
public void testCloseForErroneousRCFile() throws IOException {
    Configuration conf = new Configuration();
    LocalFileSystem fs = FileSystem.getLocal(conf);
    // create an empty file (which is not a valid rcfile)
    Path path = new Path(System.getProperty("test.tmp.dir", ".") + "/broken.rcfile");
    fs.create(path).close();
    // try to create RCFile.Reader
    final TestFSDataInputStream[] openedFile = new TestFSDataInputStream[1];
    try {
        new RCFile.Reader(fs, path, conf) {

            // this method is called by the RCFile.Reader constructor, overwritten,
            // so we can access the opened file
            @Override
            protected FSDataInputStream openFile(FileSystem fs, Path file, int bufferSize, long length) throws IOException {
                final InputStream in = super.openFile(fs, file, bufferSize, length);
                openedFile[0] = new TestFSDataInputStream(in);
                return openedFile[0];
            }
        };
        fail("IOException expected.");
    } catch (IOException expected) {
    }
    assertNotNull(path + " should have been opened.", openedFile[0]);
    assertTrue("InputStream for " + path + " should have been closed.", openedFile[0].isClosed());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) RecordReader(org.apache.hadoop.mapred.RecordReader) IOException(java.io.IOException) Test(org.junit.Test)

Example 67 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.

the class TestMsckCheckPartitions method testNumberOfListStatusCalls.

/**
 * Test counts the number of listStatus calls in the msck core method of
 * listing sub-directories. This is important to check since it unnecessary
 * listStatus calls could cause performance degradation in remote filesystems
 * like S3. The test creates a mock FileSystem object and a mock directory structure
 * to simulate a table which has 2 partition keys and 2 partition values at each level.
 * In the end it counts how many times the listStatus is called on the mock filesystem
 * and confirm its equal to the current theoretical value.
 *
 * @throws IOException
 * @throws MetastoreException
 */
@Test
public void testNumberOfListStatusCalls() throws IOException, MetastoreException {
    LocalFileSystem mockFs = Mockito.mock(LocalFileSystem.class);
    Path tableLocation = new Path("mock:///tmp/testTable");
    Path countryUS = new Path(tableLocation, "country=US");
    Path countryIND = new Path(tableLocation, "country=IND");
    Path cityPA = new Path(countryUS, "city=PA");
    Path citySF = new Path(countryUS, "city=SF");
    Path cityBOM = new Path(countryIND, "city=BOM");
    Path cityDEL = new Path(countryIND, "city=DEL");
    Path paData = new Path(cityPA, "datafile");
    Path sfData = new Path(citySF, "datafile");
    Path bomData = new Path(cityBOM, "datafile");
    Path delData = new Path(cityDEL, "datafile");
    // level 1 listing
    FileStatus[] allCountries = getMockFileStatus(countryUS, countryIND);
    when(mockFs.listStatus(tableLocation, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(allCountries);
    // level 2 listing
    FileStatus[] filesInUS = getMockFileStatus(cityPA, citySF);
    when(mockFs.listStatus(countryUS, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(filesInUS);
    FileStatus[] filesInInd = getMockFileStatus(cityBOM, cityDEL);
    when(mockFs.listStatus(countryIND, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(filesInInd);
    // level 3 listing
    FileStatus[] paFiles = getMockFileStatus(paData);
    when(mockFs.listStatus(cityPA, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(paFiles);
    FileStatus[] sfFiles = getMockFileStatus(sfData);
    when(mockFs.listStatus(citySF, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(sfFiles);
    FileStatus[] bomFiles = getMockFileStatus(bomData);
    when(mockFs.listStatus(cityBOM, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(bomFiles);
    FileStatus[] delFiles = getMockFileStatus(delData);
    when(mockFs.listStatus(cityDEL, FileUtils.HIDDEN_FILES_PATH_FILTER)).thenReturn(delFiles);
    HiveMetaStoreChecker checker = new HiveMetaStoreChecker(Mockito.mock(IMetaStoreClient.class), MetastoreConf.newMetastoreConf());
    ExecutorService executorService = Executors.newFixedThreadPool(2);
    Set<Path> result = new HashSet<>();
    checker.checkPartitionDirs(executorService, tableLocation, result, mockFs, Arrays.asList("country", "city"));
    // if there are n partition columns, then number of times listStatus should be called
    // must be equal
    // to (numDirsAtLevel1) + (numDirsAtLevel2) + ... + (numDirAtLeveln-1)
    // in this case it should 1 (table level) + 2 (US, IND)
    verify(mockFs, times(3)).listStatus(any(Path.class), any(PathFilter.class));
    Assert.assertEquals("msck should have found 4 unknown partitions", 4, result.size());
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ExecutorService(java.util.concurrent.ExecutorService) HashSet(java.util.HashSet) Test(org.junit.Test) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)

Example 68 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project crunch by cloudera.

the class CompositePathIterableTest method testCreate_FilePresent.

@Test
public void testCreate_FilePresent() throws IOException {
    String inputFilePath = FileHelper.createTempCopyOf("set1.txt");
    Configuration conf = new Configuration();
    LocalFileSystem local = FileSystem.getLocal(conf);
    Iterable<String> iterable = CompositePathIterable.create(local, new Path(inputFilePath), new TextFileReaderFactory<String>(Writables.strings(), conf));
    assertEquals(Lists.newArrayList("b", "c", "a", "e"), Lists.newArrayList(iterable));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Test(org.junit.Test)

Example 69 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project crunch by cloudera.

the class CompositePathIterableTest method testCreate_DirectoryNotPresent.

@Test(expected = IOException.class)
public void testCreate_DirectoryNotPresent() throws IOException {
    File inputFileDir = Files.createTempDir();
    inputFileDir.delete();
    // Sanity check
    assertFalse(inputFileDir.exists());
    Configuration conf = new Configuration();
    LocalFileSystem local = FileSystem.getLocal(conf);
    CompositePathIterable.create(local, new Path(inputFileDir.getAbsolutePath()), new TextFileReaderFactory<String>(Writables.strings(), conf));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) File(java.io.File) Test(org.junit.Test)

Example 70 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hbase by apache.

the class TestStore method testFlushSizeAccounting.

/**
   * Test we do not lose data if we fail a flush and then close.
   * Part of HBase-10466
   * @throws Exception
   */
@Test
public void testFlushSizeAccounting() throws Exception {
    LOG.info("Setting up a faulty file system that cannot write in " + this.name.getMethodName());
    final Configuration conf = HBaseConfiguration.create();
    // Only retry once.
    conf.setInt("hbase.hstore.flush.retries.number", 1);
    User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[] { "foo" });
    // Inject our faulty LocalFileSystem
    conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class);
    user.runAs(new PrivilegedExceptionAction<Object>() {

        @Override
        public Object run() throws Exception {
            // Make sure it worked (above is sensitive to caching details in hadoop core)
            FileSystem fs = FileSystem.get(conf);
            Assert.assertEquals(FaultyFileSystem.class, fs.getClass());
            FaultyFileSystem ffs = (FaultyFileSystem) fs;
            // Initialize region
            init(name.getMethodName(), conf);
            MemstoreSize size = store.memstore.getFlushableSize();
            Assert.assertEquals(0, size.getDataSize());
            LOG.info("Adding some data");
            MemstoreSize kvSize = new MemstoreSize();
            store.add(new KeyValue(row, family, qf1, 1, (byte[]) null), kvSize);
            size = store.memstore.getFlushableSize();
            Assert.assertEquals(kvSize, size);
            // Flush.  Bug #1 from HBASE-10466.  Make sure size calculation on failed flush is right.
            try {
                LOG.info("Flushing");
                flushStore(store, id++);
                Assert.fail("Didn't bubble up IOE!");
            } catch (IOException ioe) {
                Assert.assertTrue(ioe.getMessage().contains("Fault injected"));
            }
            size = store.memstore.getFlushableSize();
            Assert.assertEquals(kvSize, size);
            MemstoreSize kvSize2 = new MemstoreSize();
            store.add(new KeyValue(row, family, qf2, 2, (byte[]) null), kvSize2);
            // Even though we add a new kv, we expect the flushable size to be 'same' since we have
            // not yet cleared the snapshot -- the above flush failed.
            Assert.assertEquals(kvSize, size);
            ffs.fault.set(false);
            flushStore(store, id++);
            size = store.memstore.getFlushableSize();
            // Size should be the foreground kv size.
            Assert.assertEquals(kvSize2, size);
            flushStore(store, id++);
            size = store.memstore.getFlushableSize();
            assertEquals(0, size.getDataSize());
            assertEquals(0, size.getHeapSize());
            return null;
        }
    });
}
Also used : User(org.apache.hadoop.hbase.security.User) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) CompactionConfiguration(org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) FilterFileSystem(org.apache.hadoop.fs.FilterFileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException(java.io.IOException) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)120 Path (org.apache.hadoop.fs.Path)77 Test (org.junit.Test)63 Configuration (org.apache.hadoop.conf.Configuration)56 FileSystem (org.apache.hadoop.fs.FileSystem)35 IOException (java.io.IOException)33 File (java.io.File)23 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)23 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)23 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)23 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)23 Key (org.apache.accumulo.core.data.Key)22 Value (org.apache.accumulo.core.data.Value)22 ArrayList (java.util.ArrayList)19 ExecutorService (java.util.concurrent.ExecutorService)15 Future (java.util.concurrent.Future)15 Scanner (org.apache.accumulo.core.client.Scanner)14 DataSegment (org.apache.druid.timeline.DataSegment)13 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)8 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)8