use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class Import method createSubmittableJob.
/**
* Sets up the actual job.
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
TableName tableName = TableName.valueOf(args[0]);
conf.set(TABLE_NAME, tableName.getNameAsString());
Path inputDir = new Path(args[1]);
Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
job.setJarByClass(Importer.class);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(SequenceFileInputFormat.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
// make sure we get the filter in the jars
try {
Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
if (filter != null) {
TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
}
} catch (Exception e) {
throw new IOException(e);
}
if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
LOG.info("Use Large Result!!");
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
job.setMapperClass(KeyValueSortImporter.class);
job.setReducerClass(KeyValueReducer.class);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputKeyClass(KeyValueWritableComparable.class);
job.setMapOutputValueClass(KeyValue.class);
job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class", KeyValueWritableComparable.KeyValueWritableComparator.class, RawComparator.class);
Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
FileSystem fs = FileSystem.get(job.getConfiguration());
fs.deleteOnExit(partitionsPath);
job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
job.setNumReduceTasks(regionLocator.getStartKeys().length);
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), com.google.common.base.Preconditions.class);
}
} else if (hfileOutPath != null) {
LOG.info("writing to hfiles for bulk load.");
job.setMapperClass(KeyValueImporter.class);
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
job.setReducerClass(KeyValueSortReducer.class);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), com.google.common.base.Preconditions.class);
}
} else {
LOG.info("writing directly to table from Mapper.");
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
job.setMapperClass(Importer.class);
TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
job.setNumReduceTasks(0);
}
return job;
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class RegionSplitter method getSplits.
static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection, TableName tableName, SplitAlgorithm splitAlgo) throws IOException {
Pair<Path, Path> tableDirAndSplitFile = getTableDirAndSplitFile(connection.getConfiguration(), tableName);
Path tableDir = tableDirAndSplitFile.getFirst();
Path splitFile = tableDirAndSplitFile.getSecond();
FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
// Using strings because (new byte[]{0}).equals(new byte[]{0}) == false
Set<Pair<String, String>> daughterRegions = Sets.newHashSet();
// Does a split file exist?
if (!fs.exists(splitFile)) {
// NO = fresh start. calculate splits to make
LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");
// Query meta for all regions in the table
Set<Pair<byte[], byte[]>> rows = Sets.newHashSet();
Pair<byte[][], byte[][]> tmp = null;
try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
tmp = regionLocator.getStartEndKeys();
}
Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length, "Start and End rows should be equivalent");
for (int i = 0; i < tmp.getFirst().length; ++i) {
byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];
if (start.length == 0)
start = splitAlgo.firstRow();
if (end.length == 0)
end = splitAlgo.lastRow();
rows.add(Pair.newPair(start, end));
}
LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");
// prepare the split file
Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");
FSDataOutputStream tmpOut = fs.create(tmpFile);
// calculate all the splits == [daughterRegions] = [(start, splitPoint)]
for (Pair<byte[], byte[]> r : rows) {
byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());
String startStr = splitAlgo.rowToStr(r.getFirst());
String splitStr = splitAlgo.rowToStr(splitPoint);
daughterRegions.add(Pair.newPair(startStr, splitStr));
LOG.debug("Will Split [" + startStr + " , " + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr);
tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr + "\n");
}
tmpOut.close();
fs.rename(tmpFile, splitFile);
} else {
LOG.debug("_balancedSplit file found. Replay log to restore state...");
FSUtils.getInstance(fs, connection.getConfiguration()).recoverFileLease(fs, splitFile, connection.getConfiguration(), null);
// parse split file and process remaining splits
FSDataInputStream tmpIn = fs.open(splitFile);
StringBuilder sb = new StringBuilder(tmpIn.available());
while (tmpIn.available() > 0) {
sb.append(tmpIn.readChar());
}
tmpIn.close();
for (String line : sb.toString().split("\n")) {
String[] cmd = line.split(splitAlgo.separator());
Preconditions.checkArgument(3 == cmd.length);
byte[] start = splitAlgo.strToRow(cmd[1]);
String startStr = splitAlgo.rowToStr(start);
byte[] splitPoint = splitAlgo.strToRow(cmd[2]);
String splitStr = splitAlgo.rowToStr(splitPoint);
Pair<String, String> r = Pair.newPair(startStr, splitStr);
if (cmd[0].equals("+")) {
LOG.debug("Adding: " + r);
daughterRegions.add(r);
} else {
LOG.debug("Removing: " + r);
Preconditions.checkArgument(cmd[0].equals("-"), "Unknown option: " + cmd[0]);
Preconditions.checkState(daughterRegions.contains(r), "Missing row: " + r);
daughterRegions.remove(r);
}
}
LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");
}
LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList();
for (Pair<String, String> r : daughterRegions) {
ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo.strToRow(r.getSecond())));
}
return ret;
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHBaseFsckOneRS method testLingeringSplitParent.
/**
* A split parent in meta, in hdfs, and not deployed
*/
@Test(timeout = 180000)
public void testLingeringSplitParent() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
Table meta = null;
try {
setupTable(tableName);
assertEquals(ROWKEYS.length, countRows());
// make sure data in regions, if in wal only there is no data loss
admin.flush(tableName);
HRegionLocation location;
try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
location = rl.getRegionLocation(Bytes.toBytes("B"));
}
// Delete one region from meta, but not hdfs, unassign it.
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, false);
// Create a new meta entry to fake it as a split parent.
meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
HRegionInfo hri = location.getRegionInfo();
HRegionInfo a = new HRegionInfo(tbl.getName(), Bytes.toBytes("B"), Bytes.toBytes("BM"));
HRegionInfo b = new HRegionInfo(tbl.getName(), Bytes.toBytes("BM"), Bytes.toBytes("C"));
hri.setOffline(true);
hri.setSplit(true);
MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
meta.close();
admin.flush(TableName.META_TABLE_NAME);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
// regular repair cannot fix lingering split parent
hbck = doFsck(conf, true);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
assertFalse(hbck.shouldRerun());
hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
// fix lingering split parent
hbck = new HBaseFsck(conf, hbfsckExecutorService);
hbck.connect();
// i.e. -details
HBaseFsck.setDisplayFullReport();
hbck.setTimeLag(0);
hbck.setFixSplitParents(true);
hbck.onlineHbck();
assertTrue(hbck.shouldRerun());
hbck.close();
Get get = new Get(hri.getRegionName());
Result result = meta.get(get);
assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER).isEmpty());
assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER).isEmpty());
admin.flush(TableName.META_TABLE_NAME);
// fix other issues
doFsck(conf, true);
// check that all are fixed
assertNoErrors(doFsck(conf, false));
assertEquals(ROWKEYS.length, countRows());
} finally {
cleanupTable(tableName);
IOUtils.closeQuietly(meta);
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHBaseFsckOneRS method testHbckAfterRegionMerge.
@Test(timeout = 180000)
public void testHbckAfterRegionMerge() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
Table meta = null;
try {
// disable CatalogJanitor
TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
setupTable(tableName);
assertEquals(ROWKEYS.length, countRows());
try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
// make sure data in regions, if in wal only there is no data loss
admin.flush(tableName);
HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
int regionCountBeforeMerge = rl.getAllRegionLocations().size();
assertNotEquals(region1, region2);
// do a region merge
admin.mergeRegionsAsync(region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
// wait until region merged
long timeout = System.currentTimeMillis() + 30 * 1000;
while (true) {
if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
break;
} else if (System.currentTimeMillis() > timeout) {
fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2.getEncodedName() + " be merged");
}
Thread.sleep(10);
}
assertEquals(ROWKEYS.length, countRows());
HBaseFsck hbck = doFsck(conf, false);
// no errors
assertNoErrors(hbck);
}
} finally {
TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
cleanupTable(tableName);
IOUtils.closeQuietly(meta);
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project phoenix by apache.
the class PhoenixInputFormat method generateSplits.
private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan, final List<KeyRange> splits, String query) throws IOException {
Preconditions.checkNotNull(qplan);
Preconditions.checkNotNull(splits);
final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());
Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims().newJobContext(new Job(jobConf)));
boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false);
setScanCacheSize(jobConf);
// Adding Localization
HConnection connection = HConnectionManager.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf));
RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan.getTableRef().getTable().getPhysicalName().toString()));
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
for (List<Scan> scans : qplan.getScans()) {
PhoenixInputSplit inputSplit;
HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);
if (splitByStats) {
for (Scan aScan : scans) {
if (LOG.isDebugEnabled()) {
LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan.getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan.getBatch() + "] and regionLocation : " + regionLocation);
}
inputSplit = new PhoenixInputSplit(Lists.newArrayList(aScan), tablePaths[0], regionLocation, regionSize);
inputSplit.setQuery(query);
psplits.add(inputSplit);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans.get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and regionLocation : " + regionLocation);
for (int i = 0, limit = scans.size(); i < limit; i++) {
LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes.toStringBinary(scans.get(i).getAttribute(BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
}
}
inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize);
inputSplit.setQuery(query);
psplits.add(inputSplit);
}
}
return psplits;
}
Aggregations