use of org.apache.hadoop.hbase.util.RegionSizeCalculator in project hbase by apache.
the class TableInputFormatBase method getSplits.
/**
* Calculates the splits that will serve as input for the map tasks. The
* number of splits matches the number of regions in a table.
*
* @param context The current job context.
* @return The list of input splits.
* @throws IOException When creating the list of splits fails.
* @see org.apache.hadoop.mapreduce.InputFormat#getSplits(
* org.apache.hadoop.mapreduce.JobContext)
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
boolean closeOnFinish = false;
// Just in case a subclass is relying on JobConfigurable magic.
if (table == null) {
initialize(context);
closeOnFinish = true;
}
// null check in case our child overrides getTable to not throw.
try {
if (getTable() == null) {
// initialize() must not have been implemented in the subclass.
throw new IOException(INITIALIZATION_ERROR);
}
} catch (IllegalStateException exception) {
throw new IOException(INITIALIZATION_ERROR, exception);
}
try {
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(getRegionLocator(), getAdmin());
TableName tableName = getTable().getName();
Pair<byte[][], byte[][]> keys = getStartEndKeys();
if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
HRegionLocation regLoc = getRegionLocator().getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
if (null == regLoc) {
throw new IOException("Expecting at least one region.");
}
List<InputSplit> splits = new ArrayList<>(1);
long regionSize = sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
TableSplit split = new TableSplit(tableName, scan, HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc.getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0], regionSize);
splits.add(split);
return splits;
}
List<InputSplit> splits = new ArrayList<>(keys.getFirst().length);
for (int i = 0; i < keys.getFirst().length; i++) {
if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
continue;
}
byte[] startRow = scan.getStartRow();
byte[] stopRow = scan.getStopRow();
// determine if the given start an stop key fall into the region
if ((startRow.length == 0 || keys.getSecond()[i].length == 0 || Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) && (stopRow.length == 0 || Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
byte[] splitStart = startRow.length == 0 || Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys.getFirst()[i] : startRow;
byte[] splitStop = (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys.getSecond()[i] : stopRow;
HRegionLocation location = getRegionLocator().getRegionLocation(keys.getFirst()[i], false);
// The below InetSocketAddress creation does a name resolution.
InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
if (isa.isUnresolved()) {
LOG.warn("Failed resolve " + isa);
}
InetAddress regionAddress = isa.getAddress();
String regionLocation;
regionLocation = reverseDNS(regionAddress);
byte[] regionName = location.getRegionInfo().getRegionName();
String encodedRegionName = location.getRegionInfo().getEncodedName();
long regionSize = sizeCalculator.getRegionSize(regionName);
TableSplit split = new TableSplit(tableName, scan, splitStart, splitStop, regionLocation, encodedRegionName, regionSize);
splits.add(split);
if (LOG.isDebugEnabled()) {
LOG.debug("getSplits: split -> " + i + " -> " + split);
}
}
}
//The default value of "hbase.mapreduce.input.autobalance" is false, which means not enabled.
boolean enableAutoBalance = context.getConfiguration().getBoolean(MAPREDUCE_INPUT_AUTOBALANCE, false);
if (enableAutoBalance) {
long totalRegionSize = 0;
for (int i = 0; i < splits.size(); i++) {
TableSplit ts = (TableSplit) splits.get(i);
totalRegionSize += ts.getLength();
}
long averageRegionSize = totalRegionSize / splits.size();
// the averageRegionSize must be positive.
if (averageRegionSize <= 0) {
LOG.warn("The averageRegionSize is not positive: " + averageRegionSize + ", " + "set it to 1.");
averageRegionSize = 1;
}
return calculateRebalancedSplits(splits, context, averageRegionSize);
} else {
return splits;
}
} finally {
if (closeOnFinish) {
closeTable();
}
}
}
use of org.apache.hadoop.hbase.util.RegionSizeCalculator in project phoenix by apache.
the class PhoenixInputFormat method generateSplits.
private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan, final List<KeyRange> splits, String query) throws IOException {
Preconditions.checkNotNull(qplan);
Preconditions.checkNotNull(splits);
final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());
Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims().newJobContext(new Job(jobConf)));
boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false);
setScanCacheSize(jobConf);
// Adding Localization
HConnection connection = HConnectionManager.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf));
RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan.getTableRef().getTable().getPhysicalName().toString()));
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
for (List<Scan> scans : qplan.getScans()) {
PhoenixInputSplit inputSplit;
HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);
if (splitByStats) {
for (Scan aScan : scans) {
if (LOG.isDebugEnabled()) {
LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan.getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan.getBatch() + "] and regionLocation : " + regionLocation);
}
inputSplit = new PhoenixInputSplit(Lists.newArrayList(aScan), tablePaths[0], regionLocation, regionSize);
inputSplit.setQuery(query);
psplits.add(inputSplit);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans.get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and regionLocation : " + regionLocation);
for (int i = 0, limit = scans.size(); i < limit; i++) {
LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes.toStringBinary(scans.get(i).getAttribute(BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
}
}
inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize);
inputSplit.setQuery(query);
psplits.add(inputSplit);
}
}
return psplits;
}
use of org.apache.hadoop.hbase.util.RegionSizeCalculator in project phoenix by apache.
the class PhoenixInputFormat method generateSplits.
private List<InputSplit> generateSplits(final QueryPlan qplan, final List<KeyRange> splits, Configuration config) throws IOException {
Preconditions.checkNotNull(qplan);
Preconditions.checkNotNull(splits);
// Get the RegionSizeCalculator
org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(config);
RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan.getTableRef().getTable().getPhysicalName().toString()));
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());
for (List<Scan> scans : qplan.getScans()) {
// Get the region location
HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
String regionLocation = location.getHostname();
// Get the region size
long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
// Generate splits based off statistics, or just region splits?
boolean splitByStats = PhoenixConfigurationUtil.getSplitByStats(config);
if (splitByStats) {
for (Scan aScan : scans) {
if (LOG.isDebugEnabled()) {
LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan.getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan.getBatch() + "] and regionLocation : " + regionLocation);
}
psplits.add(new PhoenixInputSplit(Collections.singletonList(aScan), regionSize, regionLocation));
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans.get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and regionLocation : " + regionLocation);
for (int i = 0, limit = scans.size(); i < limit; i++) {
LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes.toStringBinary(scans.get(i).getAttribute(BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
}
}
psplits.add(new PhoenixInputSplit(scans, regionSize, regionLocation));
}
}
return psplits;
}
use of org.apache.hadoop.hbase.util.RegionSizeCalculator in project hbase by apache.
the class MultiTableInputFormatBase method getSplits.
/**
* Calculates the splits that will serve as input for the map tasks. The
* number of splits matches the number of regions in a table.
*
* @param context The current job context.
* @return The list of input splits.
* @throws IOException When creating the list of splits fails.
* @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
if (scans.isEmpty()) {
throw new IOException("No scans were provided.");
}
Map<TableName, List<Scan>> tableMaps = new HashMap<>();
for (Scan scan : scans) {
byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
if (tableNameBytes == null)
throw new IOException("A scan object did not have a table name");
TableName tableName = TableName.valueOf(tableNameBytes);
List<Scan> scanList = tableMaps.get(tableName);
if (scanList == null) {
scanList = new ArrayList<>();
tableMaps.put(tableName, scanList);
}
scanList.add(scan);
}
List<InputSplit> splits = new ArrayList<>();
Iterator iter = tableMaps.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
TableName tableName = entry.getKey();
List<Scan> scanList = entry.getValue();
try (Connection conn = ConnectionFactory.createConnection(context.getConfiguration());
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, conn.getAdmin());
Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
for (Scan scan : scanList) {
if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
throw new IOException("Expecting at least one region for table : " + tableName.getNameAsString());
}
int count = 0;
byte[] startRow = scan.getStartRow();
byte[] stopRow = scan.getStopRow();
for (int i = 0; i < keys.getFirst().length; i++) {
if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
continue;
}
if ((startRow.length == 0 || keys.getSecond()[i].length == 0 || Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) && (stopRow.length == 0 || Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
byte[] splitStart = startRow.length == 0 || Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys.getFirst()[i] : startRow;
byte[] splitStop = (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys.getSecond()[i] : stopRow;
HRegionLocation hregionLocation = regionLocator.getRegionLocation(keys.getFirst()[i], false);
String regionHostname = hregionLocation.getHostname();
HRegionInfo regionInfo = hregionLocation.getRegionInfo();
String encodedRegionName = regionInfo.getEncodedName();
long regionSize = sizeCalculator.getRegionSize(regionInfo.getRegionName());
TableSplit split = new TableSplit(table.getName(), scan, splitStart, splitStop, regionHostname, encodedRegionName, regionSize);
splits.add(split);
if (LOG.isDebugEnabled())
LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
}
}
}
}
}
return splits;
}
Aggregations