use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class SchemaResource method update.
private Response update(final TableName name, final TableSchemaModel model, final UriInfo uriInfo, final Admin admin) {
if (servlet.isReadOnly()) {
return Response.status(Response.Status.FORBIDDEN).type(MIMETYPE_TEXT).entity("Forbidden" + CRLF).build();
}
try {
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(admin.getDescriptor(name));
admin.disableTable(name);
try {
for (ColumnSchemaModel family : model.getColumns()) {
ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family.getName()));
for (Map.Entry<QName, Object> e : family.getAny().entrySet()) {
columnFamilyDescriptorBuilder.setValue(e.getKey().getLocalPart(), e.getValue().toString());
}
TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
ColumnFamilyDescriptor columnFamilyDescriptor = columnFamilyDescriptorBuilder.build();
if (tableDescriptor.hasColumnFamily(columnFamilyDescriptor.getName())) {
admin.modifyColumnFamily(name, columnFamilyDescriptor);
} else {
admin.addColumnFamily(name, columnFamilyDescriptor);
}
}
} catch (IOException e) {
return Response.status(Response.Status.SERVICE_UNAVAILABLE).type(MIMETYPE_TEXT).entity("Unavailable" + CRLF).build();
} finally {
admin.enableTable(TableName.valueOf(tableResource.getName()));
}
servlet.getMetrics().incrementSucessfulPutRequests(1);
return Response.ok().build();
} catch (Exception e) {
servlet.getMetrics().incrementFailedPutRequests(1);
return processException(e);
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class RestoreTool method incrementalRestoreTable.
/**
* During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently
* tableNames and newTablesNames only contain single table, will be expanded to multiple tables in
* the future
* @param conn HBase connection
* @param tableBackupPath backup path
* @param logDirs : incremental backup folders, which contains WAL
* @param tableNames : source tableNames(table names were backuped)
* @param newTableNames : target tableNames(table names to be restored to)
* @param incrBackupId incremental backup Id
* @throws IOException exception
*/
public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs, TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException {
try (Admin admin = conn.getAdmin()) {
if (tableNames.length != newTableNames.length) {
throw new IOException("Number of source tables and target tables does not match!");
}
FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
// full backup. Here, check that all new tables exists
for (TableName tableName : newTableNames) {
if (!admin.tableExists(tableName)) {
throw new IOException("HBase table " + tableName + " does not exist. Create the table first, e.g. by restoring a full backup.");
}
}
// adjust table schema
for (int i = 0; i < tableNames.length; i++) {
TableName tableName = tableNames[i];
TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId);
if (tableDescriptor == null) {
throw new IOException("Can't find " + tableName + "'s descriptor.");
}
LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId);
TableName newTableName = newTableNames[i];
TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName);
List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies());
List<ColumnFamilyDescriptor> existingFamilies = Arrays.asList(newTableDescriptor.getColumnFamilies());
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor);
boolean schemaChangeNeeded = false;
for (ColumnFamilyDescriptor family : families) {
if (!existingFamilies.contains(family)) {
builder.setColumnFamily(family);
schemaChangeNeeded = true;
}
}
for (ColumnFamilyDescriptor family : existingFamilies) {
if (!families.contains(family)) {
builder.removeColumnFamily(family.getName());
schemaChangeNeeded = true;
}
}
if (schemaChangeNeeded) {
modifyTableSync(conn, builder.build());
LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor);
}
}
RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
restoreService.run(logDirs, tableNames, newTableNames, false);
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class TestVerifyReplication method testVerifyRepJobWithRawOptions.
/**
* Load a row into a table, make sure the data is really the same, delete the row, make sure the
* delete marker is replicated, run verify replication with and without raw to check the results.
*/
@Test
public void testVerifyRepJobWithRawOptions() throws Exception {
LOG.info(name.getMethodName());
final TableName tableName = TableName.valueOf(name.getMethodName());
byte[] familyname = Bytes.toBytes("fam_raw");
byte[] row = Bytes.toBytes("row_raw");
Table lHtable1 = null;
Table lHtable2 = null;
try {
ColumnFamilyDescriptor fam = ColumnFamilyDescriptorBuilder.newBuilder(familyname).setMaxVersions(100).setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build();
TableDescriptor table = TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(fam).build();
Connection connection1 = ConnectionFactory.createConnection(CONF1);
Connection connection2 = ConnectionFactory.createConnection(CONF2);
try (Admin admin1 = connection1.getAdmin()) {
admin1.createTable(table, HBaseTestingUtil.KEYS_FOR_HBA_CREATE_TABLE);
}
try (Admin admin2 = connection2.getAdmin()) {
admin2.createTable(table, HBaseTestingUtil.KEYS_FOR_HBA_CREATE_TABLE);
}
UTIL1.waitUntilAllRegionsAssigned(tableName);
UTIL2.waitUntilAllRegionsAssigned(tableName);
lHtable1 = UTIL1.getConnection().getTable(tableName);
lHtable2 = UTIL2.getConnection().getTable(tableName);
Put put = new Put(row);
put.addColumn(familyname, row, row);
lHtable1.put(put);
Get get = new Get(row);
for (int i = 0; i < NB_RETRIES; i++) {
if (i == NB_RETRIES - 1) {
fail("Waited too much time for put replication");
}
Result res = lHtable2.get(get);
if (res.isEmpty()) {
LOG.info("Row not available");
Thread.sleep(SLEEP_TIME);
} else {
assertArrayEquals(res.value(), row);
break;
}
}
Delete del = new Delete(row);
lHtable1.delete(del);
get = new Get(row);
for (int i = 0; i < NB_RETRIES; i++) {
if (i == NB_RETRIES - 1) {
fail("Waited too much time for del replication");
}
Result res = lHtable2.get(get);
if (res.size() >= 1) {
LOG.info("Row not deleted");
Thread.sleep(SLEEP_TIME);
} else {
break;
}
}
// Checking verifyReplication for the default behavior.
String[] argsWithoutRaw = new String[] { PEER_ID, tableName.getNameAsString() };
runVerifyReplication(argsWithoutRaw, 0, 0);
// Checking verifyReplication with raw
String[] argsWithRawAsTrue = new String[] { "--raw", PEER_ID, tableName.getNameAsString() };
runVerifyReplication(argsWithRawAsTrue, 1, 0);
} finally {
if (lHtable1 != null) {
lHtable1.close();
}
if (lHtable2 != null) {
lHtable2.close();
}
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class HMaster method finishActiveMasterInitialization.
/**
* Finish initialization of HMaster after becoming the primary master.
* <p/>
* The startup order is a bit complicated but very important, do not change it unless you know
* what you are doing.
* <ol>
* <li>Initialize file system based components - file system manager, wal manager, table
* descriptors, etc</li>
* <li>Publish cluster id</li>
* <li>Here comes the most complicated part - initialize server manager, assignment manager and
* region server tracker
* <ol type='i'>
* <li>Create server manager</li>
* <li>Create master local region</li>
* <li>Create procedure executor, load the procedures, but do not start workers. We will start it
* later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
* server</li>
* <li>Create assignment manager and start it, load the meta region state, but do not load data
* from meta region</li>
* <li>Start region server tracker, construct the online servers set and find out dead servers and
* schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
* scan the wal directory to find out possible live region servers, and the differences between
* these two sets are the dead servers</li>
* </ol>
* </li>
* <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
* <li>Start necessary service threads - balancer, catalog janitor, executor services, and also
* the procedure executor, etc. Notice that the balancer must be created first as assignment
* manager may use it when assigning regions.</li>
* <li>Wait for meta to be initialized if necessary, start table state manager.</li>
* <li>Wait for enough region servers to check-in</li>
* <li>Let assignment manager load data from meta and construct region states</li>
* <li>Start all other things such as chore services, etc</li>
* </ol>
* <p/>
* Notice that now we will not schedule a special procedure to make meta online(unless the first
* time where meta has not been created yet), we will rely on SCP to bring meta online.
*/
private void finishActiveMasterInitialization(MonitoredTask status) throws IOException, InterruptedException, KeeperException, ReplicationException {
/*
* We are active master now... go initialize components we need to run.
*/
status.setStatus("Initializing Master file system");
this.masterActiveTime = EnvironmentEdgeManager.currentTime();
// TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
// always initialize the MemStoreLAB as we use a region to store data in master now, see
// localStore.
initializeMemStoreChunkCreator(null);
this.fileSystemManager = new MasterFileSystem(conf);
this.walManager = new MasterWalManager(this);
// warm-up HTDs cache on master initialization
if (preLoadTableDescriptors) {
status.setStatus("Pre-loading table descriptors");
this.tableDescriptors.getAll();
}
// Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
// only after it has checked in with the Master. At least a few tests ask Master for clusterId
// before it has called its run method and before RegionServer has done the reportForDuty.
ClusterId clusterId = fileSystemManager.getClusterId();
status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
this.clusterId = clusterId.toString();
// hbase.write.hbck1.lock.file to false.
if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
Pair<Path, FSDataOutputStream> result = null;
try {
result = HBaseFsck.checkAndMarkRunningHbck(this.conf, HBaseFsck.createLockRetryCounterFactory(this.conf).create());
} finally {
if (result != null) {
Closeables.close(result.getSecond(), true);
}
}
}
status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
// The below two managers must be created before loading procedures, as they will be used during
// loading.
this.serverManager = createServerManager(this);
this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
if (!conf.getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
this.splitWALManager = new SplitWALManager(this);
}
// initialize master local region
masterRegion = MasterRegionFactory.create(this);
tryMigrateMetaLocationsFromZooKeeper();
createProcedureExecutor();
Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType = procedureExecutor.getActiveProceduresNoCopy().stream().collect(Collectors.groupingBy(p -> p.getClass()));
// Create Assignment Manager
this.assignmentManager = createAssignmentManager(this, masterRegion);
this.assignmentManager.start();
// TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as
// completed, it could still be in the procedure list. This is a bit strange but is another
// story, need to verify the implementation for ProcedureExecutor and ProcedureStore.
List<TransitRegionStateProcedure> ritList = procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream().filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).collect(Collectors.toList());
this.assignmentManager.setupRIT(ritList);
// Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
// be registered in the deadServers set -- and with the list of servernames out on the
// filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
// We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
// TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
this.regionServerTracker.upgrade(procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream().map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()), walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir());
// This manager must be accessed AFTER hbase:meta is confirmed on line..
this.tableStateManager = new TableStateManager(this);
status.setStatus("Initializing ZK system trackers");
initializeZKBasedSystemTrackers();
status.setStatus("Loading last flushed sequence id of regions");
try {
this.serverManager.loadLastFlushedSequenceIds();
} catch (IOException e) {
LOG.info("Failed to load last flushed sequence id of regions" + " from file system", e);
}
// Set ourselves as active Master now our claim has succeeded up in zk.
this.activeMaster = true;
// Start the Zombie master detector after setting master as active, see HBASE-21535
Thread zombieDetector = new Thread(new MasterInitializationMonitor(this), "ActiveMasterInitializationMonitor-" + EnvironmentEdgeManager.currentTime());
zombieDetector.setDaemon(true);
zombieDetector.start();
if (!maintenanceMode) {
// default with quota support, avoiding if user specifically asks to not load this Observer.
if (QuotaUtil.isQuotaEnabled(conf)) {
updateConfigurationForQuotasObserver(conf);
}
// initialize master side coprocessors before we start handling requests
status.setStatus("Initializing master coprocessors");
this.cpHost = new MasterCoprocessorHost(this, this.conf);
} else {
// start an in process region server for carrying system regions
maintenanceRegionServer = JVMClusterUtil.createRegionServerThread(getConfiguration(), HRegionServer.class, 0);
maintenanceRegionServer.start();
}
// Checking if meta needs initializing.
status.setStatus("Initializing meta table if this is a new deploy");
InitMetaProcedure initMetaProc = null;
// Print out state of hbase:meta on startup; helps debugging.
if (!this.assignmentManager.getRegionStates().hasTableRegionStates(TableName.META_TABLE_NAME)) {
Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream().filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
initMetaProc = optProc.orElseGet(() -> {
// schedule an init meta procedure if meta has not been deployed yet
InitMetaProcedure temp = new InitMetaProcedure();
procedureExecutor.submitProcedure(temp);
return temp;
});
}
// initialize load balancer
this.balancer.setMasterServices(this);
this.balancer.initialize();
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
// start up all service threads.
status.setStatus("Initializing master service threads");
startServiceThreads();
// wait meta to be initialized after we start procedure executor
if (initMetaProc != null) {
initMetaProc.await();
}
// Wake up this server to check in
sleeper.skipSleepCycle();
// Wait for region servers to report in.
// With this as part of master initialization, it precludes our being able to start a single
// server that is both Master and RegionServer. Needs more thought. TODO.
String statusStr = "Wait for region servers to report in";
status.setStatus(statusStr);
LOG.info(Objects.toString(status));
waitForRegionServers(status);
// Check if master is shutting down because issue initializing regionservers or balancer.
if (isStopped()) {
return;
}
status.setStatus("Starting assignment manager");
// available. That's what waitForMetaOnline does.
if (!waitForMetaOnline()) {
return;
}
TableDescriptor metaDescriptor = tableDescriptors.get(TableName.META_TABLE_NAME);
final ColumnFamilyDescriptor tableFamilyDesc = metaDescriptor.getColumnFamily(HConstants.TABLE_FAMILY);
final ColumnFamilyDescriptor replBarrierFamilyDesc = metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
this.assignmentManager.joinCluster();
// The below depends on hbase:meta being online.
this.assignmentManager.processOfflineRegions();
// this must be called after the above processOfflineRegions to prevent race
this.assignmentManager.wakeMetaLoadedEvent();
// first.
if (conf.get(HConstants.META_REPLICAS_NUM) != null) {
int replicasNumInConf = conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM);
TableDescriptor metaDesc = tableDescriptors.get(TableName.META_TABLE_NAME);
if (metaDesc.getRegionReplication() != replicasNumInConf) {
// it is possible that we already have some replicas before upgrading, so we must set the
// region replication number in meta TableDescriptor directly first, without creating a
// ModifyTableProcedure, otherwise it may cause a double assign for the meta replicas.
int existingReplicasCount = assignmentManager.getRegionStates().getRegionsOfTable(TableName.META_TABLE_NAME).size();
if (existingReplicasCount > metaDesc.getRegionReplication()) {
LOG.info("Update replica count of hbase:meta from {}(in TableDescriptor)" + " to {}(existing ZNodes)", metaDesc.getRegionReplication(), existingReplicasCount);
metaDesc = TableDescriptorBuilder.newBuilder(metaDesc).setRegionReplication(existingReplicasCount).build();
tableDescriptors.update(metaDesc);
}
// check again, and issue a ModifyTableProcedure if needed
if (metaDesc.getRegionReplication() != replicasNumInConf) {
LOG.info("The {} config is {} while the replica count in TableDescriptor is {}" + " for hbase:meta, altering...", HConstants.META_REPLICAS_NUM, replicasNumInConf, metaDesc.getRegionReplication());
procedureExecutor.submitProcedure(new ModifyTableProcedure(procedureExecutor.getEnvironment(), TableDescriptorBuilder.newBuilder(metaDesc).setRegionReplication(replicasNumInConf).build(), null, metaDesc, false));
}
}
}
// Initialize after meta is up as below scans meta
FavoredNodesManager fnm = getFavoredNodesManager();
if (fnm != null) {
fnm.initializeFromMeta();
}
// set cluster status again after user regions are assigned
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
// Start balancer and meta catalog janitor after meta and regions have been assigned.
status.setStatus("Starting balancer and catalog janitor");
this.clusterStatusChore = new ClusterStatusChore(this, balancer);
getChoreService().scheduleChore(clusterStatusChore);
this.balancerChore = new BalancerChore(this);
if (!disableBalancerChoreForTest) {
getChoreService().scheduleChore(balancerChore);
}
if (regionNormalizerManager != null) {
getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore());
}
this.catalogJanitorChore = new CatalogJanitor(this);
getChoreService().scheduleChore(catalogJanitorChore);
this.hbckChore = new HbckChore(this);
getChoreService().scheduleChore(hbckChore);
this.serverManager.startChore();
// Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
if (!waitForNamespaceOnline()) {
return;
}
status.setStatus("Starting cluster schema service");
try {
initClusterSchemaService();
} catch (IllegalStateException e) {
if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException && tableFamilyDesc == null && replBarrierFamilyDesc == null) {
LOG.info("ClusterSchema service could not be initialized. This is " + "expected during HBase 1 to 2 upgrade", e);
} else {
throw e;
}
}
if (this.cpHost != null) {
try {
this.cpHost.preMasterInitialization();
} catch (IOException e) {
LOG.error("Coprocessor preMasterInitialization() hook failed", e);
}
}
status.markComplete("Initialization successful");
LOG.info(String.format("Master has completed initialization %.3fsec", (EnvironmentEdgeManager.currentTime() - masterActiveTime) / 1000.0f));
this.masterFinishedInitializationTime = EnvironmentEdgeManager.currentTime();
configurationManager.registerObserver(this.balancer);
configurationManager.registerObserver(this.hfileCleanerPool);
configurationManager.registerObserver(this.logCleanerPool);
configurationManager.registerObserver(this.hfileCleaner);
configurationManager.registerObserver(this.logCleaner);
configurationManager.registerObserver(this.regionsRecoveryConfigManager);
// Set master as 'initialized'.
setInitialized(true);
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
// create missing CFs in meta table after master is set to 'initialized'.
createMissingCFsInMetaDuringUpgrade(metaDescriptor);
// services will be started during master init phase.
throw new PleaseRestartMasterException("Aborting active master after missing" + " CFs are successfully added in meta. Subsequent active master " + "initialization should be uninterrupted");
}
if (maintenanceMode) {
LOG.info("Detected repair mode, skipping final initialization steps.");
return;
}
assignmentManager.checkIfShouldMoveSystemRegionAsync();
status.setStatus("Starting quota manager");
initQuotaManager();
if (QuotaUtil.isQuotaEnabled(conf)) {
// Create the quota snapshot notifier
spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
spaceQuotaSnapshotNotifier.initialize(getConnection());
this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
// Start the chore to read the region FS space reports and act on them
getChoreService().scheduleChore(quotaObserverChore);
this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
// Start the chore to read snapshots and add their usage to table/NS quotas
getChoreService().scheduleChore(snapshotQuotaChore);
}
final SlowLogMasterService slowLogMasterService = new SlowLogMasterService(conf, this);
slowLogMasterService.init();
// clear the dead servers with same host name and port of online server because we are not
// removing dead server with same hostname and port of rs which is trying to check in before
// master initialization. See HBASE-5916.
this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
// Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
status.setStatus("Checking ZNode ACLs");
zooKeeper.checkAndSetZNodeAcls();
status.setStatus("Initializing MOB Cleaner");
initMobCleaner();
status.setStatus("Calling postStartMaster coprocessors");
if (this.cpHost != null) {
// don't let cp initialization errors kill the master
try {
this.cpHost.postStartMaster();
} catch (IOException ioe) {
LOG.error("Coprocessor postStartMaster() hook failed", ioe);
}
}
zombieDetector.interrupt();
/*
* After master has started up, lets do balancer post startup initialization. Since this runs
* in activeMasterManager thread, it should be fine.
*/
long start = EnvironmentEdgeManager.currentTime();
this.balancer.postMasterStartupInitialize();
if (LOG.isDebugEnabled()) {
LOG.debug("Balancer post startup initialization complete, took " + ((EnvironmentEdgeManager.currentTime() - start) / 1000) + " seconds");
}
this.rollingUpgradeChore = new RollingUpgradeChore(this);
getChoreService().scheduleChore(rollingUpgradeChore);
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class MobRefReporter method run.
/**
* Main method for the tool.
* @return 0 if success, 1 for bad args. 2 if job aborted with an exception,
* 3 if mr job was unsuccessful
*/
public int run(String[] args) throws IOException, InterruptedException {
// TODO make family and table optional
if (args.length != 3) {
printUsage();
return 1;
}
final String output = args[0];
final String tableName = args[1];
final String familyName = args[2];
final long reportStartTime = EnvironmentEdgeManager.currentTime();
Configuration conf = getConf();
try {
FileSystem fs = FileSystem.get(conf);
// check whether the current user is the same one with the owner of hbase root
String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName();
FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR)));
if (hbaseRootFileStat.length > 0) {
String owner = hbaseRootFileStat[0].getOwner();
if (!owner.equals(currentUserName)) {
String errorMsg = "The current user[" + currentUserName + "] does not have hbase root credentials." + " If this job fails due to an inability to read HBase's internal directories, " + "you will need to rerun as a user with sufficient permissions. The HBase superuser " + "is a safe choice.";
LOG.warn(errorMsg);
}
} else {
LOG.error("The passed configs point to an HBase dir does not exist: {}", conf.get(HConstants.HBASE_DIR));
throw new IOException("The target HBase does not exist");
}
byte[] family;
int maxVersions;
TableName tn = TableName.valueOf(tableName);
try (Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin()) {
TableDescriptor htd = admin.getDescriptor(tn);
ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName));
if (hcd == null || !hcd.isMobEnabled()) {
throw new IOException("Column family " + familyName + " is not a MOB column family");
}
family = hcd.getName();
maxVersions = hcd.getMaxVersions();
}
String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", "");
Job job = null;
Scan scan = new Scan();
scan.addFamily(family);
// Do not retrieve the mob data when scanning
scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
// If a scanner caching value isn't set, pick a smaller default since we know we're doing
// a full table scan and don't want to impact other clients badly.
scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000));
scan.setCacheBlocks(false);
scan.readVersions(maxVersions);
conf.set(REPORT_JOB_ID, id);
job = Job.getInstance(conf);
job.setJarByClass(getClass());
TableMapReduceUtil.initTableMapperJob(tn, scan, MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job);
job.setReducerClass(MobRefReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(output));
job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName);
// for use in the reducer. easier than re-parsing it out of the scan string.
job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName);
// Use when we start this job as the base point for file "recency".
job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime);
if (job.waitForCompletion(true)) {
LOG.info("Finished creating report for '{}', family='{}'", tn, familyName);
} else {
System.err.println("Job was not successful");
return 3;
}
return 0;
} catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) {
System.err.println("Job aborted due to exception " + e);
// job failed
return 2;
}
}
Aggregations