use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.
the class HiveAccumuloTableInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
final Instance instance = accumuloParams.getInstance();
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
Path[] tablePaths = FileInputFormat.getInputPaths(context);
try {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
final Connector connector;
// Need to get a Connector so we look up the user's authorizations if not otherwise specified
if (accumuloParams.useSasl() && !ugi.hasKerberosCredentials()) {
// In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
// Convert the stub from the configuration back into a normal Token
// More reflection to support 1.6
token = helper.unwrapAuthenticationToken(jobConf, token);
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), token);
} else {
// Still in the local JVM, use the username+password or Kerberos credentials
connector = accumuloParams.getConnector(instance);
}
final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
// We don't want that.
if (null != ranges && ranges.isEmpty()) {
return new InputSplit[0];
}
// Set the relevant information in the Configuration for the AccumuloInputFormat
configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
int numColumns = columnMappings.size();
List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
// Sanity check
if (numColumns < readColIds.size())
throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
// get splits from Accumulo
InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
for (int i = 0; i < splits.length; i++) {
RangeInputSplit ris = (RangeInputSplit) splits[i];
hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
}
return hiveSplits;
} catch (AccumuloException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (AccumuloSecurityException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (SerDeException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
}
}
use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.
the class HiveAccumuloTableInputFormat method setTableName.
/**
* Sets the table name on a RangeInputSplit, accounting for change in method name. Any reflection
* related exception is wrapped in an {@link IOException}
*
* @param split
* The RangeInputSplit to operate on
* @param tableName
* The name of the table to set
* @throws IOException
*/
protected void setTableName(RangeInputSplit split, String tableName) throws IOException {
// ACCUMULO-3017 shenanigans with method names changing without deprecation
Method setTableName = null;
try {
setTableName = RangeInputSplit.class.getMethod("setTableName", String.class);
} catch (SecurityException e) {
log.debug("Could not get getTableName method from RangeInputSplit", e);
} catch (NoSuchMethodException e) {
log.debug("Could not get getTableName method from RangeInputSplit", e);
}
if (null != setTableName) {
try {
setTableName.invoke(split, tableName);
return;
} catch (IllegalArgumentException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
} catch (IllegalAccessException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
} catch (InvocationTargetException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
}
}
Method setTable;
try {
setTable = RangeInputSplit.class.getMethod("setTable", String.class);
} catch (SecurityException e) {
throw new IOException("Could not set table name from RangeInputSplit", e);
} catch (NoSuchMethodException e) {
throw new IOException("Could not set table name from RangeInputSplit", e);
}
try {
setTable.invoke(split, tableName);
} catch (IllegalArgumentException e) {
throw new IOException("Could not set table name from RangeInputSplit", e);
} catch (IllegalAccessException e) {
throw new IOException("Could not set table name from RangeInputSplit", e);
} catch (InvocationTargetException e) {
throw new IOException("Could not set table name from RangeInputSplit", e);
}
}
use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.
the class HiveAccumuloTableInputFormat method getTableName.
/**
* Reflection to work around Accumulo 1.5 and 1.6 incompatibilities. Throws an {@link IOException}
* for any reflection related exceptions
*
* @param split
* A RangeInputSplit
* @return The name of the table from the split
* @throws IOException
*/
protected String getTableName(RangeInputSplit split) throws IOException {
// ACCUMULO-3017 shenanigans with method names changing without deprecation
Method getTableName = null;
try {
getTableName = RangeInputSplit.class.getMethod("getTableName");
} catch (SecurityException e) {
log.debug("Could not get getTableName method from RangeInputSplit", e);
} catch (NoSuchMethodException e) {
log.debug("Could not get getTableName method from RangeInputSplit", e);
}
if (null != getTableName) {
try {
return (String) getTableName.invoke(split);
} catch (IllegalArgumentException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
} catch (IllegalAccessException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
} catch (InvocationTargetException e) {
log.debug("Could not invoke getTableName method from RangeInputSplit", e);
}
}
Method getTable;
try {
getTable = RangeInputSplit.class.getMethod("getTable");
} catch (SecurityException e) {
throw new IOException("Could not get table name from RangeInputSplit", e);
} catch (NoSuchMethodException e) {
throw new IOException("Could not get table name from RangeInputSplit", e);
}
try {
return (String) getTable.invoke(split);
} catch (IllegalArgumentException e) {
throw new IOException("Could not get table name from RangeInputSplit", e);
} catch (IllegalAccessException e) {
throw new IOException("Could not get table name from RangeInputSplit", e);
} catch (InvocationTargetException e) {
throw new IOException("Could not get table name from RangeInputSplit", e);
}
}
use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.
the class HiveAccumuloTableInputFormat method getRecordReader.
/**
* Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
* package.
*
* @param inputSplit
* @param jobConf
* @param reporter
* @return RecordReader
* @throws IOException
*/
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
try {
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
RangeInputSplit rangeSplit = hiveSplit.getSplit();
log.info("Split: " + rangeSplit);
// Should be fixed in Accumulo 1.5.2 and 1.6.1
if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
rangeSplit.setIterators(iterators);
}
// but we want it to, so just re-set it if it's null.
if (null == getTableName(rangeSplit)) {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
}
final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
return new HiveAccumuloRecordReader(recordReader, iterators.size());
} catch (SerDeException e) {
throw new IOException(StringUtils.stringifyException(e));
}
}
Aggregations