Search in sources :

Example 1 with HBaseDDLExecutorFactory

use of co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory in project cdap by caskdata.

the class MapReduceRuntimeService method buildJobJar.

/**
   * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
   *
   * @return a new {@link File} containing the job jar
   */
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);
    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }
    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {

        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);
    classes.add(SLF4JBridgeHandler.class);
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            classes.add(inputFormatClass);
            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
    }
    // Add KMS class
    if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
        classes.add(SecureStoreUtils.getKMSSecureStore());
    }
    Class<? extends HBaseDDLExecutor> ddlExecutorClass = new HBaseDDLExecutorFactory(cConf, hConf).get().getClass();
    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
        classes.add(hbaseTableUtilClass);
        classes.add(ddlExecutorClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }
    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(new CombineClassLoader(getClass().getClassLoader(), Collections.singleton(ddlExecutorClass.getClassLoader())));
    try {
        appBundler.createBundle(Locations.toLocation(jobJar), classes);
    } finally {
        ClassLoaders.setContextClassLoader(oldCLassLoader);
    }
    LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}
Also used : HadoopClassExcluder(co.cask.cdap.common.twill.HadoopClassExcluder) JarOutputStream(java.util.jar.JarOutputStream) ClassAcceptor(org.apache.twill.api.ClassAcceptor) URL(java.net.URL) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) ProvisionException(com.google.inject.ProvisionException) FileOutputStream(java.io.FileOutputStream) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) WeakReferenceDelegatorClassLoader(co.cask.cdap.common.lang.WeakReferenceDelegatorClassLoader) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) File(java.io.File) JarFile(java.util.jar.JarFile) ApplicationBundler(org.apache.twill.internal.ApplicationBundler)

Example 2 with HBaseDDLExecutorFactory

use of co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory in project cdap by caskdata.

the class HBaseTableTest method beforeClass.

@BeforeClass
public static void beforeClass() throws Exception {
    cConf = CConfiguration.create();
    hBaseTableUtil = new HBaseTableUtilFactory(cConf, new SimpleNamespaceQueryAdmin()).get();
    // TODO: CDAP-1634 - Explore a way to not have every HBase test class do this.
    ddlExecutor = new HBaseDDLExecutorFactory(cConf, TEST_HBASE.getHBaseAdmin().getConfiguration()).get();
    ddlExecutor.createNamespaceIfNotExists(hBaseTableUtil.getHBaseNamespace(NAMESPACE1));
    ddlExecutor.createNamespaceIfNotExists(hBaseTableUtil.getHBaseNamespace(NAMESPACE2));
}
Also used : SimpleNamespaceQueryAdmin(co.cask.cdap.common.namespace.SimpleNamespaceQueryAdmin) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) HBaseTableUtilFactory(co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory) BeforeClass(org.junit.BeforeClass)

Example 3 with HBaseDDLExecutorFactory

use of co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory in project cdap by caskdata.

the class HBaseQueueTest method init.

@BeforeClass
public static void init() throws Exception {
    hConf = TEST_HBASE.getConfiguration();
    // Customize test configuration
    cConf = CConfiguration.create();
    cConf.set(Constants.Zookeeper.QUORUM, TEST_HBASE.getZkConnectionString());
    cConf.set(TxConstants.Service.CFG_DATA_TX_BIND_PORT, Integer.toString(Networks.getRandomPort()));
    cConf.set(Constants.Dataset.TABLE_PREFIX, TABLE_PREFIX);
    cConf.set(Constants.CFG_HDFS_USER, System.getProperty("user.name"));
    cConf.setLong(QueueConstants.QUEUE_CONFIG_UPDATE_FREQUENCY, 10000L);
    // Test with fewer splits than default (16).
    // Fewer splits make the forceEvict runs faster, which makes all queue tests run faster
    cConf.setInt(QueueConstants.ConfigKeys.QUEUE_TABLE_PRESPLITS, 4);
    cConf.setLong(TxConstants.Manager.CFG_TX_TIMEOUT, 100000000L);
    cConf.setLong(TxConstants.Manager.CFG_TX_MAX_TIMEOUT, 100000000L);
    injector = Guice.createInjector(new DataFabricModules().getDistributedModules(), new ConfigModule(cConf, hConf), new ZKClientModule(), new LocationRuntimeModule().getDistributedModules(), new NamespaceClientUnitTestModule().getModule(), new DiscoveryRuntimeModule().getDistributedModules(), new TransactionMetricsModule(), new AuthorizationTestModule(), new AuthorizationEnforcementModule().getInMemoryModules(), new AuthenticationContextModules().getMasterModule(), new DataSetsModules().getInMemoryModules(), new SystemDatasetRuntimeModule().getDistributedModules(), new AbstractModule() {

        @Override
        protected void configure() {
            bind(NotificationFeedManager.class).to(NoOpNotificationFeedManager.class).in(Scopes.SINGLETON);
            bind(OwnerAdmin.class).to(DefaultOwnerAdmin.class);
            bind(UGIProvider.class).to(UnsupportedUGIProvider.class);
        }
    });
    //create HBase namespace
    hbaseAdmin = TEST_HBASE.getHBaseAdmin();
    ddlExecutor = new HBaseDDLExecutorFactory(cConf, hbaseAdmin.getConfiguration()).get();
    tableUtil = injector.getInstance(HBaseTableUtil.class);
    ddlExecutor.createNamespaceIfNotExists(tableUtil.getHBaseNamespace(NamespaceId.SYSTEM));
    ddlExecutor.createNamespaceIfNotExists(tableUtil.getHBaseNamespace(NAMESPACE_ID));
    ddlExecutor.createNamespaceIfNotExists(tableUtil.getHBaseNamespace(NAMESPACE_ID1));
    ConfigurationTable configTable = new ConfigurationTable(hConf);
    configTable.write(ConfigurationTable.Type.DEFAULT, cConf);
    zkClientService = injector.getInstance(ZKClientService.class);
    zkClientService.startAndWait();
    txService = injector.getInstance(TransactionService.class);
    Thread t = new Thread() {

        @Override
        public void run() {
            txService.start();
        }
    };
    t.start();
    // The TransactionManager should be started by the txService.
    // We just want a reference to that so that we can ask for tx snapshot
    txSystemClient = injector.getInstance(TransactionSystemClient.class);
    queueClientFactory = injector.getInstance(QueueClientFactory.class);
    queueAdmin = injector.getInstance(QueueAdmin.class);
    executorFactory = injector.getInstance(TransactionExecutorFactory.class);
}
Also used : ConfigModule(co.cask.cdap.common.guice.ConfigModule) UGIProvider(co.cask.cdap.security.impersonation.UGIProvider) UnsupportedUGIProvider(co.cask.cdap.security.impersonation.UnsupportedUGIProvider) TransactionMetricsModule(co.cask.cdap.data.runtime.TransactionMetricsModule) TransactionExecutorFactory(org.apache.tephra.TransactionExecutorFactory) ZKClientModule(co.cask.cdap.common.guice.ZKClientModule) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) SystemDatasetRuntimeModule(co.cask.cdap.data.runtime.SystemDatasetRuntimeModule) DiscoveryRuntimeModule(co.cask.cdap.common.guice.DiscoveryRuntimeModule) NamespaceClientUnitTestModule(co.cask.cdap.common.guice.NamespaceClientUnitTestModule) TransactionService(org.apache.tephra.distributed.TransactionService) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) DefaultOwnerAdmin(co.cask.cdap.security.impersonation.DefaultOwnerAdmin) OwnerAdmin(co.cask.cdap.security.impersonation.OwnerAdmin) LocationRuntimeModule(co.cask.cdap.common.guice.LocationRuntimeModule) AuthorizationTestModule(co.cask.cdap.security.authorization.AuthorizationTestModule) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) AbstractModule(com.google.inject.AbstractModule) QueueAdmin(co.cask.cdap.data2.transaction.queue.QueueAdmin) ZKClientService(org.apache.twill.zookeeper.ZKClientService) ConfigurationTable(co.cask.cdap.data2.util.hbase.ConfigurationTable) QueueClientFactory(co.cask.cdap.data2.queue.QueueClientFactory) NoOpNotificationFeedManager(co.cask.cdap.notifications.feeds.service.NoOpNotificationFeedManager) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) BeforeClass(org.junit.BeforeClass)

Example 4 with HBaseDDLExecutorFactory

use of co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory in project cdap by caskdata.

the class ConfigurationTable method write.

/**
   * Writes the {@link CConfiguration} instance as a new row to the HBase table.  The {@link Type} given is used as
   * the row key (allowing multiple configurations to be stored).  After the new configuration is written, this will
   * delete any configurations written with an earlier timestamp (to prevent removed values from being visible).
   * @param cConf The CConfiguration instance to store
   * @throws IOException If an error occurs while writing the configuration
   */
public void write(Type type, CConfiguration cConf) throws IOException {
    // must create the table if it doesn't exist
    HTable table = null;
    try (HBaseDDLExecutor ddlExecutor = new HBaseDDLExecutorFactory(cConf, hbaseConf).get()) {
        HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
        TableId tableId = tableUtil.createHTableId(NamespaceId.SYSTEM, TABLE_NAME);
        ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(FAMILY), hbaseConf);
        TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(tableId, cConf).addColumnFamily(cfdBuilder.build());
        ddlExecutor.createTableIfNotExists(tdBuilder.build(), null);
        long now = System.currentTimeMillis();
        long previous = now - 1;
        byte[] typeBytes = Bytes.toBytes(type.name());
        LOG.info("Writing new config row with key " + type);
        // populate the configuration data
        table = tableUtil.createHTable(hbaseConf, tableId);
        table.setAutoFlush(false);
        Put p = new Put(typeBytes);
        for (Map.Entry<String, String> e : cConf) {
            p.add(FAMILY, Bytes.toBytes(e.getKey()), now, Bytes.toBytes(e.getValue()));
        }
        table.put(p);
        LOG.info("Deleting any configuration from " + previous + " or before");
        Delete d = new Delete(typeBytes);
        d.deleteFamily(FAMILY, previous);
        table.delete(d);
    } finally {
        if (table != null) {
            try {
                table.close();
            } catch (IOException ioe) {
                LOG.error("Error closing HBaseAdmin: " + ioe.getMessage(), ioe);
            }
        }
    }
}
Also used : HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) TableId(co.cask.cdap.data2.util.TableId) Delete(org.apache.hadoop.hbase.client.Delete) IOException(java.io.IOException) HTable(org.apache.hadoop.hbase.client.HTable) Put(org.apache.hadoop.hbase.client.Put) Map(java.util.Map)

Example 5 with HBaseDDLExecutorFactory

use of co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory in project cdap by caskdata.

the class HBaseConsumerStateTest method init.

@BeforeClass
public static void init() throws Exception {
    zkServer = InMemoryZKServer.builder().setDataDir(TMP_FOLDER.newFolder()).build();
    zkServer.startAndWait();
    Configuration hConf = TEST_HBASE.getConfiguration();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TMP_FOLDER.newFolder().getAbsolutePath());
    cConf.set(Constants.Zookeeper.QUORUM, zkServer.getConnectionStr());
    Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new ZKClientModule(), new NonCustomLocationUnitTestModule().getModule(), new DiscoveryRuntimeModule().getInMemoryModules(), new TransactionMetricsModule(), new DataFabricModules().getDistributedModules(), new AbstractModule() {

        @Override
        protected void configure() {
            bind(NamespaceQueryAdmin.class).to(SimpleNamespaceQueryAdmin.class);
        }
    }, new DataSetsModules().getInMemoryModules(), new SystemDatasetRuntimeModule().getInMemoryModules(), new ExploreClientModule(), new ViewAdminModules().getInMemoryModules(), new AuthorizationTestModule(), new AuthorizationEnforcementModule().getInMemoryModules(), new AuthenticationContextModules().getNoOpModule(), Modules.override(new StreamAdminModules().getDistributedModules()).with(new AbstractModule() {

        @Override
        protected void configure() {
            bind(StreamMetaStore.class).to(InMemoryStreamMetaStore.class);
            bind(NotificationFeedManager.class).to(NoOpNotificationFeedManager.class);
            bind(UGIProvider.class).to(UnsupportedUGIProvider.class);
            bind(OwnerAdmin.class).to(DefaultOwnerAdmin.class);
        }
    }));
    zkClientService = injector.getInstance(ZKClientService.class);
    zkClientService.startAndWait();
    streamAdmin = injector.getInstance(StreamAdmin.class);
    stateStoreFactory = injector.getInstance(StreamConsumerStateStoreFactory.class);
    tableUtil = injector.getInstance(HBaseTableUtil.class);
    ddlExecutor = new HBaseDDLExecutorFactory(cConf, TEST_HBASE.getHBaseAdmin().getConfiguration()).get();
    ddlExecutor.createNamespaceIfNotExists(tableUtil.getHBaseNamespace(TEST_NAMESPACE));
    ddlExecutor.createNamespaceIfNotExists(tableUtil.getHBaseNamespace(OTHER_NAMESPACE));
    setupNamespaces(injector.getInstance(NamespacedLocationFactory.class));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConfigModule(co.cask.cdap.common.guice.ConfigModule) UGIProvider(co.cask.cdap.security.impersonation.UGIProvider) UnsupportedUGIProvider(co.cask.cdap.security.impersonation.UnsupportedUGIProvider) NamespacedLocationFactory(co.cask.cdap.common.namespace.NamespacedLocationFactory) TransactionMetricsModule(co.cask.cdap.data.runtime.TransactionMetricsModule) ViewAdminModules(co.cask.cdap.data.view.ViewAdminModules) ZKClientModule(co.cask.cdap.common.guice.ZKClientModule) Injector(com.google.inject.Injector) StreamMetaStore(co.cask.cdap.data.stream.service.StreamMetaStore) InMemoryStreamMetaStore(co.cask.cdap.data.stream.service.InMemoryStreamMetaStore) SimpleNamespaceQueryAdmin(co.cask.cdap.common.namespace.SimpleNamespaceQueryAdmin) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) SystemDatasetRuntimeModule(co.cask.cdap.data.runtime.SystemDatasetRuntimeModule) DiscoveryRuntimeModule(co.cask.cdap.common.guice.DiscoveryRuntimeModule) NotificationFeedManager(co.cask.cdap.notifications.feeds.NotificationFeedManager) NoOpNotificationFeedManager(co.cask.cdap.notifications.feeds.service.NoOpNotificationFeedManager) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) DefaultOwnerAdmin(co.cask.cdap.security.impersonation.DefaultOwnerAdmin) OwnerAdmin(co.cask.cdap.security.impersonation.OwnerAdmin) NonCustomLocationUnitTestModule(co.cask.cdap.common.guice.NonCustomLocationUnitTestModule) AuthorizationTestModule(co.cask.cdap.security.authorization.AuthorizationTestModule) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) AbstractModule(com.google.inject.AbstractModule) StreamAdminModules(co.cask.cdap.data.stream.StreamAdminModules) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) ZKClientService(org.apache.twill.zookeeper.ZKClientService) ExploreClientModule(co.cask.cdap.explore.guice.ExploreClientModule) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) StreamConsumerStateStoreFactory(co.cask.cdap.data2.transaction.stream.StreamConsumerStateStoreFactory) BeforeClass(org.junit.BeforeClass)

Aggregations

HBaseDDLExecutorFactory (co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory)12 BeforeClass (org.junit.BeforeClass)10 HBaseTableUtilFactory (co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory)6 ConfigModule (co.cask.cdap.common.guice.ConfigModule)4 DiscoveryRuntimeModule (co.cask.cdap.common.guice.DiscoveryRuntimeModule)4 ZKClientModule (co.cask.cdap.common.guice.ZKClientModule)4 SimpleNamespaceQueryAdmin (co.cask.cdap.common.namespace.SimpleNamespaceQueryAdmin)4 DataFabricModules (co.cask.cdap.data.runtime.DataFabricModules)4 DataSetsModules (co.cask.cdap.data.runtime.DataSetsModules)4 SystemDatasetRuntimeModule (co.cask.cdap.data.runtime.SystemDatasetRuntimeModule)4 TransactionMetricsModule (co.cask.cdap.data.runtime.TransactionMetricsModule)4 ConfigurationTable (co.cask.cdap.data2.util.hbase.ConfigurationTable)4 HBaseTableUtil (co.cask.cdap.data2.util.hbase.HBaseTableUtil)4 AuthenticationContextModules (co.cask.cdap.security.auth.context.AuthenticationContextModules)4 AuthorizationEnforcementModule (co.cask.cdap.security.authorization.AuthorizationEnforcementModule)4 AuthorizationTestModule (co.cask.cdap.security.authorization.AuthorizationTestModule)4 DefaultOwnerAdmin (co.cask.cdap.security.impersonation.DefaultOwnerAdmin)4 UnsupportedUGIProvider (co.cask.cdap.security.impersonation.UnsupportedUGIProvider)4 AbstractModule (com.google.inject.AbstractModule)4 LocationFactory (org.apache.twill.filesystem.LocationFactory)4