Search in sources :

Example 1 with HBaseTableUtil

use of co.cask.cdap.data2.util.hbase.HBaseTableUtil in project cdap by caskdata.

the class MapReduceRuntimeService method buildJobJar.

/**
   * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
   *
   * @return a new {@link File} containing the job jar
   */
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);
    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }
    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {

        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);
    classes.add(SLF4JBridgeHandler.class);
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            classes.add(inputFormatClass);
            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
    }
    // Add KMS class
    if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
        classes.add(SecureStoreUtils.getKMSSecureStore());
    }
    Class<? extends HBaseDDLExecutor> ddlExecutorClass = new HBaseDDLExecutorFactory(cConf, hConf).get().getClass();
    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
        classes.add(hbaseTableUtilClass);
        classes.add(ddlExecutorClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }
    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(new CombineClassLoader(getClass().getClassLoader(), Collections.singleton(ddlExecutorClass.getClassLoader())));
    try {
        appBundler.createBundle(Locations.toLocation(jobJar), classes);
    } finally {
        ClassLoaders.setContextClassLoader(oldCLassLoader);
    }
    LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}
Also used : HadoopClassExcluder(co.cask.cdap.common.twill.HadoopClassExcluder) JarOutputStream(java.util.jar.JarOutputStream) ClassAcceptor(org.apache.twill.api.ClassAcceptor) URL(java.net.URL) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) ProvisionException(com.google.inject.ProvisionException) FileOutputStream(java.io.FileOutputStream) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) WeakReferenceDelegatorClassLoader(co.cask.cdap.common.lang.WeakReferenceDelegatorClassLoader) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) File(java.io.File) JarFile(java.util.jar.JarFile) ApplicationBundler(org.apache.twill.internal.ApplicationBundler)

Example 2 with HBaseTableUtil

use of co.cask.cdap.data2.util.hbase.HBaseTableUtil in project cdap by caskdata.

the class HBaseTableTest method beforeClass.

@BeforeClass
public static void beforeClass() throws Exception {
    cConf = CConfiguration.create();
    hBaseTableUtil = new HBaseTableUtilFactory(cConf, new SimpleNamespaceQueryAdmin()).get();
    // TODO: CDAP-1634 - Explore a way to not have every HBase test class do this.
    ddlExecutor = new HBaseDDLExecutorFactory(cConf, TEST_HBASE.getHBaseAdmin().getConfiguration()).get();
    ddlExecutor.createNamespaceIfNotExists(hBaseTableUtil.getHBaseNamespace(NAMESPACE1));
    ddlExecutor.createNamespaceIfNotExists(hBaseTableUtil.getHBaseNamespace(NAMESPACE2));
}
Also used : SimpleNamespaceQueryAdmin(co.cask.cdap.common.namespace.SimpleNamespaceQueryAdmin) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) HBaseTableUtilFactory(co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory) BeforeClass(org.junit.BeforeClass)

Example 3 with HBaseTableUtil

use of co.cask.cdap.data2.util.hbase.HBaseTableUtil in project cdap by caskdata.

the class IncrementHandlerTest method createTable.

@Override
public HTable createTable(TableId tableId) throws Exception {
    HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
    HTableDescriptorBuilder tableDesc = tableUtil.buildHTableDescriptor(tableId);
    HColumnDescriptor columnDesc = new HColumnDescriptor(FAMILY);
    columnDesc.setMaxVersions(Integer.MAX_VALUE);
    columnDesc.setValue(IncrementHandlerState.PROPERTY_TRANSACTIONAL, "false");
    tableDesc.addFamily(columnDesc);
    tableDesc.addCoprocessor(IncrementHandler.class.getName());
    HTableDescriptor htd = tableDesc.build();
    TEST_HBASE.getHBaseAdmin().createTable(htd);
    TEST_HBASE.waitUntilTableAvailable(htd.getName(), 5000);
    return tableUtil.createHTable(conf, tableId);
}
Also used : HTableDescriptorBuilder(co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) HBaseTableUtilFactory(co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor)

Example 4 with HBaseTableUtil

use of co.cask.cdap.data2.util.hbase.HBaseTableUtil in project cdap by caskdata.

the class IncrementSummingScannerTest method createRegion.

static HRegion createRegion(Configuration hConf, CConfiguration cConf, TableId tableId, HColumnDescriptor cfd) throws Exception {
    HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
    HTableDescriptorBuilder htd = tableUtil.buildHTableDescriptor(tableId);
    cfd.setMaxVersions(Integer.MAX_VALUE);
    cfd.setKeepDeletedCells(true);
    htd.addFamily(cfd);
    htd.addCoprocessor(IncrementHandler.class.getName());
    HTableDescriptor desc = htd.build();
    String tableName = desc.getNameAsString();
    Path tablePath = new Path("/tmp/" + tableName);
    Path hlogPath = new Path("/tmp/hlog-" + tableName);
    FileSystem fs = FileSystem.get(hConf);
    assertTrue(fs.mkdirs(tablePath));
    WALFactory walFactory = new WALFactory(hConf, null, hlogPath.toString());
    WAL hLog = walFactory.getWAL(new byte[] { 1 });
    HRegionInfo regionInfo = new HRegionInfo(desc.getTableName());
    HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(hConf, fs, tablePath, regionInfo);
    return new HRegion(regionFS, hLog, hConf, desc, new LocalRegionServerServices(hConf, ServerName.valueOf(InetAddress.getLocalHost().getHostName(), 0, System.currentTimeMillis())));
}
Also used : Path(org.apache.hadoop.fs.Path) HTableDescriptorBuilder(co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder) WAL(org.apache.hadoop.hbase.wal.WAL) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) HBaseTableUtilFactory(co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory) WALFactory(org.apache.hadoop.hbase.wal.WALFactory)

Example 5 with HBaseTableUtil

use of co.cask.cdap.data2.util.hbase.HBaseTableUtil in project cdap by caskdata.

the class HBaseQueueClientFactory method createConsumer.

@Override
public QueueConsumer createConsumer(final QueueName queueName, final ConsumerConfig consumerConfig, int numGroups) throws IOException {
    final HBaseQueueAdmin admin = ensureTableExists(queueName);
    try {
        final long groupId = consumerConfig.getGroupId();
        // A callback for create a list of HBaseQueueConsumer
        // based on the current queue consumer state of the given group
        Callable<List<HBaseQueueConsumer>> consumerCreator = new Callable<List<HBaseQueueConsumer>>() {

            @Override
            public List<HBaseQueueConsumer> call() throws Exception {
                List<HBaseConsumerState> states;
                try (HBaseConsumerStateStore stateStore = admin.getConsumerStateStore(queueName)) {
                    TransactionExecutor txExecutor = Transactions.createTransactionExecutor(txExecutorFactory, stateStore);
                    // Find all consumer states for consumers that need to be created based on current state
                    states = txExecutor.execute(new Callable<List<HBaseConsumerState>>() {

                        @Override
                        public List<HBaseConsumerState> call() throws Exception {
                            List<HBaseConsumerState> consumerStates = Lists.newArrayList();
                            HBaseConsumerState state = stateStore.getState(groupId, consumerConfig.getInstanceId());
                            if (state.getPreviousBarrier() == null) {
                                // Old HBase consumer (Salted based, not sharded)
                                consumerStates.add(state);
                                return consumerStates;
                            }
                            // Find the smallest start barrier that has something to consume for this instance.
                            // It should always exists since we assume the queue is configured before this method is called
                            List<QueueBarrier> queueBarriers = stateStore.getAllBarriers(groupId);
                            if (queueBarriers.isEmpty()) {
                                throw new IllegalStateException(String.format("No consumer information available. Queue: %s, GroupId: %d, InstanceId: %d", queueName, groupId, consumerConfig.getInstanceId()));
                            }
                            QueueBarrier startBarrier = Iterables.find(Lists.reverse(queueBarriers), new Predicate<QueueBarrier>() {

                                @Override
                                public boolean apply(QueueBarrier barrier) {
                                    return barrier.getGroupConfig().getGroupSize() > consumerConfig.getInstanceId() && stateStore.isAllConsumed(consumerConfig, barrier.getStartRow());
                                }
                            }, queueBarriers.get(0));
                            int groupSize = startBarrier.getGroupConfig().getGroupSize();
                            for (int i = consumerConfig.getInstanceId(); i < groupSize; i += consumerConfig.getGroupSize()) {
                                consumerStates.add(stateStore.getState(groupId, i));
                            }
                            return consumerStates;
                        }
                    });
                }
                List<HBaseQueueConsumer> consumers = Lists.newArrayList();
                for (HBaseConsumerState state : states) {
                    QueueType queueType = (state.getPreviousBarrier() == null) ? QueueType.QUEUE : QueueType.SHARDED_QUEUE;
                    HTable hTable = createHTable(admin.getDataTableId(queueName, queueType));
                    int distributorBuckets = getDistributorBuckets(hTable.getTableDescriptor());
                    HBaseQueueStrategy strategy = (state.getPreviousBarrier() == null) ? new SaltedHBaseQueueStrategy(hBaseTableUtil, distributorBuckets) : new ShardedHBaseQueueStrategy(hBaseTableUtil, distributorBuckets);
                    consumers.add(queueUtil.getQueueConsumer(cConf, hTable, queueName, state, admin.getConsumerStateStore(queueName), strategy));
                }
                return consumers;
            }
        };
        return new SmartQueueConsumer(queueName, consumerConfig, consumerCreator);
    } catch (Exception e) {
        // If there is exception, nothing much can be done here besides propagating
        Throwables.propagateIfPossible(e);
        throw new IOException(e);
    }
}
Also used : HTable(org.apache.hadoop.hbase.client.HTable) Callable(java.util.concurrent.Callable) QueueType(co.cask.cdap.data2.transaction.queue.QueueConstants.QueueType) List(java.util.List) TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) IOException(java.io.IOException)

Aggregations

TableId (co.cask.cdap.data2.util.TableId)20 HBaseTableUtilFactory (co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory)20 HBaseTableUtil (co.cask.cdap.data2.util.hbase.HBaseTableUtil)18 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)18 NamespaceId (co.cask.cdap.proto.id.NamespaceId)15 HTableDescriptorBuilder (co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder)14 Test (org.junit.Test)8 IOException (java.io.IOException)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 Path (org.apache.hadoop.fs.Path)7 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)7 HRegionFileSystem (org.apache.hadoop.hbase.regionserver.HRegionFileSystem)7 HTable (org.apache.hadoop.hbase.client.HTable)6 WAL (org.apache.hadoop.hbase.wal.WAL)5 WALFactory (org.apache.hadoop.hbase.wal.WALFactory)5 HBaseDDLExecutorFactory (co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory)3 ScanBuilder (co.cask.cdap.data2.util.hbase.ScanBuilder)3 TableName (org.apache.hadoop.hbase.TableName)3