Search in sources :

Example 46 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project druid by druid-io.

the class JobHelper method setupClasspath.

/**
 * Uploads jar files to hdfs and configures the classpath.
 * Snapshot jar files are uploaded to intermediateClasspath and not shared across multiple jobs.
 * Non-Snapshot jar files are uploaded to a distributedClasspath and shared across multiple jobs.
 *
 * @param distributedClassPath  classpath shared across multiple jobs
 * @param intermediateClassPath classpath exclusive for this job. used to upload SNAPSHOT jar files.
 * @param job                   job to run
 *
 * @throws IOException
 */
public static void setupClasspath(final Path distributedClassPath, final Path intermediateClassPath, final Job job) throws IOException {
    String classpathProperty = System.getProperty("druid.hadoop.internal.classpath");
    if (classpathProperty == null) {
        classpathProperty = System.getProperty("java.class.path");
    }
    String[] jarFiles = classpathProperty.split(File.pathSeparator);
    final Configuration conf = job.getConfiguration();
    final FileSystem fs = distributedClassPath.getFileSystem(conf);
    if (fs instanceof LocalFileSystem) {
        return;
    }
    for (String jarFilePath : jarFiles) {
        final File jarFile = new File(jarFilePath);
        if (jarFile.getName().endsWith(".jar")) {
            try {
                RetryUtils.retry(() -> {
                    if (isSnapshot(jarFile)) {
                        addSnapshotJarToClassPath(jarFile, intermediateClassPath, fs, job);
                    } else {
                        addJarToClassPath(jarFile, distributedClassPath, intermediateClassPath, fs, job);
                    }
                    return true;
                }, shouldRetryPredicate(), NUM_RETRIES);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) File(java.io.File) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException)

Example 47 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hazelcast by hazelcast.

the class ReadHadoopPTest method createInputFiles.

private void createInputFiles() throws IOException {
    Configuration conf = new Configuration();
    LocalFileSystem local = FileSystem.getLocal(conf);
    for (int i = 0; i < 4; i++) {
        org.apache.hadoop.fs.Path path = createPath();
        paths.add(path);
        if (inputFormatClass.getSimpleName().equals("SequenceFileInputFormat")) {
            createInputSequenceFiles(conf, path);
        } else {
            createInputTextFiles(local, path);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem)

Example 48 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hbase by apache.

the class TestHStore method testFlushSizeSizing.

/**
 * Test we do not lose data if we fail a flush and then close.
 * Part of HBase-10466
 */
@Test
public void testFlushSizeSizing() throws Exception {
    LOG.info("Setting up a faulty file system that cannot write in " + this.name.getMethodName());
    final Configuration conf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
    // Only retry once.
    conf.setInt("hbase.hstore.flush.retries.number", 1);
    User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[] { "foo" });
    // Inject our faulty LocalFileSystem
    conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class);
    user.runAs(new PrivilegedExceptionAction<Object>() {

        @Override
        public Object run() throws Exception {
            // Make sure it worked (above is sensitive to caching details in hadoop core)
            FileSystem fs = FileSystem.get(conf);
            assertEquals(FaultyFileSystem.class, fs.getClass());
            FaultyFileSystem ffs = (FaultyFileSystem) fs;
            // Initialize region
            init(name.getMethodName(), conf);
            MemStoreSize mss = store.memstore.getFlushableSize();
            assertEquals(0, mss.getDataSize());
            LOG.info("Adding some data");
            MemStoreSizing kvSize = new NonThreadSafeMemStoreSizing();
            store.add(new KeyValue(row, family, qf1, 1, (byte[]) null), kvSize);
            // add the heap size of active (mutable) segment
            kvSize.incMemStoreSize(0, MutableSegment.DEEP_OVERHEAD, 0, 0);
            mss = store.memstore.getFlushableSize();
            assertEquals(kvSize.getMemStoreSize(), mss);
            // Flush.  Bug #1 from HBASE-10466.  Make sure size calculation on failed flush is right.
            try {
                LOG.info("Flushing");
                flushStore(store, id++);
                fail("Didn't bubble up IOE!");
            } catch (IOException ioe) {
                assertTrue(ioe.getMessage().contains("Fault injected"));
            }
            // due to snapshot, change mutable to immutable segment
            kvSize.incMemStoreSize(0, CSLMImmutableSegment.DEEP_OVERHEAD_CSLM - MutableSegment.DEEP_OVERHEAD, 0, 0);
            mss = store.memstore.getFlushableSize();
            assertEquals(kvSize.getMemStoreSize(), mss);
            MemStoreSizing kvSize2 = new NonThreadSafeMemStoreSizing();
            store.add(new KeyValue(row, family, qf2, 2, (byte[]) null), kvSize2);
            kvSize2.incMemStoreSize(0, MutableSegment.DEEP_OVERHEAD, 0, 0);
            // Even though we add a new kv, we expect the flushable size to be 'same' since we have
            // not yet cleared the snapshot -- the above flush failed.
            assertEquals(kvSize.getMemStoreSize(), mss);
            ffs.fault.set(false);
            flushStore(store, id++);
            mss = store.memstore.getFlushableSize();
            // Size should be the foreground kv size.
            assertEquals(kvSize2.getMemStoreSize(), mss);
            flushStore(store, id++);
            mss = store.memstore.getFlushableSize();
            assertEquals(0, mss.getDataSize());
            assertEquals(MutableSegment.DEEP_OVERHEAD, mss.getHeapSize());
            return null;
        }
    });
}
Also used : User(org.apache.hadoop.hbase.security.User) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) CompactionConfiguration(org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) IOException(java.io.IOException) IllegalArgumentIOException(org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException) IOException(java.io.IOException) IllegalArgumentIOException(org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException) FileSystem(org.apache.hadoop.fs.FileSystem) FilterFileSystem(org.apache.hadoop.fs.FilterFileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Test(org.junit.Test)

Example 49 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project hbase by apache.

the class TestHStore method testHandleErrorsInFlush.

@Test
public void testHandleErrorsInFlush() throws Exception {
    LOG.info("Setting up a faulty file system that cannot write");
    final Configuration conf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
    User user = User.createUserForTesting(conf, "testhandleerrorsinflush", new String[] { "foo" });
    // Inject our faulty LocalFileSystem
    conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class);
    user.runAs(new PrivilegedExceptionAction<Object>() {

        @Override
        public Object run() throws Exception {
            // Make sure it worked (above is sensitive to caching details in hadoop core)
            FileSystem fs = FileSystem.get(conf);
            assertEquals(FaultyFileSystem.class, fs.getClass());
            // Initialize region
            init(name.getMethodName(), conf);
            LOG.info("Adding some data");
            store.add(new KeyValue(row, family, qf1, 1, (byte[]) null), null);
            store.add(new KeyValue(row, family, qf2, 1, (byte[]) null), null);
            store.add(new KeyValue(row, family, qf3, 1, (byte[]) null), null);
            LOG.info("Before flush, we should have no files");
            Collection<StoreFileInfo> files = store.getRegionFileSystem().getStoreFiles(store.getColumnFamilyName());
            assertEquals(0, files != null ? files.size() : 0);
            // flush
            try {
                LOG.info("Flushing");
                flush(1);
                fail("Didn't bubble up IOE!");
            } catch (IOException ioe) {
                assertTrue(ioe.getMessage().contains("Fault injected"));
            }
            LOG.info("After failed flush, we should still have no files!");
            files = store.getRegionFileSystem().getStoreFiles(store.getColumnFamilyName());
            assertEquals(0, files != null ? files.size() : 0);
            store.getHRegion().getWAL().close();
            return null;
        }
    });
    FileSystem.closeAllForUGI(user.getUGI());
}
Also used : User(org.apache.hadoop.hbase.security.User) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) CompactionConfiguration(org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) FilterFileSystem(org.apache.hadoop.fs.FilterFileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collection(java.util.Collection) IOException(java.io.IOException) IllegalArgumentIOException(org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException) IOException(java.io.IOException) IllegalArgumentIOException(org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException) Test(org.junit.Test)

Example 50 with LocalFileSystem

use of org.apache.hadoop.fs.LocalFileSystem in project druid by druid-io.

the class HdfsDataSegmentPusherTest method shouldMakeDefaultSegmentOutputPathIfNotHDFS.

@Test
public void shouldMakeDefaultSegmentOutputPathIfNotHDFS() {
    final HadoopIngestionSpec schema;
    try {
        schema = objectMapper.readValue("{\n" + "    \"dataSchema\": {\n" + "        \"dataSource\": \"the:data:source\",\n" + "        \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n" + "            \"type\": \"uniform\",\n" + "            \"segmentGranularity\": \"hour\",\n" + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n" + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"segmentOutputPath\": \"/tmp/dru:id/data:test\"\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30, ISOChronology.getInstanceUTC()), 4712);
    Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), JobHelper.INDEX_ZIP, new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig()));
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip", path.toString());
    path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig()));
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip.0", path.toString());
}
Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) DataSegment(org.apache.druid.timeline.DataSegment) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) ExpectedException(org.junit.rules.ExpectedException) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Bucket(org.apache.druid.indexer.Bucket) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Aggregations

LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)120 Path (org.apache.hadoop.fs.Path)77 Test (org.junit.Test)63 Configuration (org.apache.hadoop.conf.Configuration)56 FileSystem (org.apache.hadoop.fs.FileSystem)35 IOException (java.io.IOException)33 File (java.io.File)23 NewTableConfiguration (org.apache.accumulo.core.client.admin.NewTableConfiguration)23 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)23 SummarizerConfiguration (org.apache.accumulo.core.client.summary.SummarizerConfiguration)23 DefaultConfiguration (org.apache.accumulo.core.conf.DefaultConfiguration)23 Key (org.apache.accumulo.core.data.Key)22 Value (org.apache.accumulo.core.data.Value)22 ArrayList (java.util.ArrayList)19 ExecutorService (java.util.concurrent.ExecutorService)15 Future (java.util.concurrent.Future)15 Scanner (org.apache.accumulo.core.client.Scanner)14 DataSegment (org.apache.druid.timeline.DataSegment)13 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)8 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)8