use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationLargeMaxSplit.
@Test
public void testSplitEliminationLargeMaxSplit() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeConstantDesc con;
ExprNodeGenericFuncDesc en;
String sargStr;
createTestSarg(inspector, udf, childExpr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(0);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// no stripes satisfies the condition
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only first stripe will satisfy condition and hence single split
assertEquals(1, splits.length);
con = new ExprNodeConstantDesc(5);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first stripe will satisfy the predicate and will be a single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(29);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 3 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(70);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last two stripe will
// be a separate split
assertEquals(2, splits.length);
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class TestHiveAccumuloTypes method testUtf8Types.
@Test
public void testUtf8Types() throws Exception {
final String tableName = test.getMethodName(), user = "root", pass = "";
MockInstance mockInstance = new MockInstance(test.getMethodName());
Connector conn = mockInstance.getConnector(user, new PasswordToken(pass));
HiveAccumuloTableInputFormat inputformat = new HiveAccumuloTableInputFormat();
JobConf conf = new JobConf();
conf.set(AccumuloSerDeParameters.TABLE_NAME, tableName);
conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true");
conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName());
conf.set(AccumuloSerDeParameters.USER_NAME, user);
conf.set(AccumuloSerDeParameters.USER_PASS, pass);
// not used for mock, but
conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181");
// required by input format.
conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, AccumuloHiveConstants.ROWID + ",cf:string,cf:boolean,cf:tinyint,cf:smallint,cf:int,cf:bigint" + ",cf:float,cf:double,cf:decimal,cf:date,cf:timestamp,cf:char,cf:varchar");
conf.set(serdeConstants.LIST_COLUMNS, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
conn.tableOperations().create(tableName);
BatchWriterConfig writerConf = new BatchWriterConfig();
BatchWriter writer = conn.createBatchWriter(tableName, writerConf);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
String cf = "cf";
byte[] cfBytes = cf.getBytes();
ByteArrayRef byteRef = new ByteArrayRef();
Mutation m = new Mutation("row1");
// string
String stringValue = "string";
baos.reset();
JavaStringObjectInspector stringOI = (JavaStringObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, stringOI.create(stringValue), stringOI, false, (byte) 0, null);
m.put(cfBytes, "string".getBytes(), baos.toByteArray());
// boolean
boolean booleanValue = true;
baos.reset();
JavaBooleanObjectInspector booleanOI = (JavaBooleanObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, booleanOI.create(booleanValue), booleanOI, false, (byte) 0, null);
m.put(cfBytes, "boolean".getBytes(), baos.toByteArray());
// tinyint
byte tinyintValue = -127;
baos.reset();
JavaByteObjectInspector byteOI = (JavaByteObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, tinyintValue, byteOI, false, (byte) 0, null);
m.put(cfBytes, "tinyint".getBytes(), baos.toByteArray());
// smallint
short smallintValue = Short.MAX_VALUE;
baos.reset();
JavaShortObjectInspector shortOI = (JavaShortObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, smallintValue, shortOI, false, (byte) 0, null);
m.put(cfBytes, "smallint".getBytes(), baos.toByteArray());
// int
int intValue = Integer.MAX_VALUE;
baos.reset();
JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, intValue, intOI, false, (byte) 0, null);
m.put(cfBytes, "int".getBytes(), baos.toByteArray());
// bigint
long bigintValue = Long.MAX_VALUE;
baos.reset();
JavaLongObjectInspector longOI = (JavaLongObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, bigintValue, longOI, false, (byte) 0, null);
m.put(cfBytes, "bigint".getBytes(), baos.toByteArray());
// float
float floatValue = Float.MAX_VALUE;
baos.reset();
JavaFloatObjectInspector floatOI = (JavaFloatObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, floatValue, floatOI, false, (byte) 0, null);
m.put(cfBytes, "float".getBytes(), baos.toByteArray());
// double
double doubleValue = Double.MAX_VALUE;
baos.reset();
JavaDoubleObjectInspector doubleOI = (JavaDoubleObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, doubleValue, doubleOI, false, (byte) 0, null);
m.put(cfBytes, "double".getBytes(), baos.toByteArray());
// decimal
HiveDecimal decimalValue = HiveDecimal.create("1.23");
baos.reset();
JavaHiveDecimalObjectInspector decimalOI = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new DecimalTypeInfo(5, 2));
LazyUtils.writePrimitiveUTF8(baos, decimalOI.create(decimalValue), decimalOI, false, (byte) 0, null);
m.put(cfBytes, "decimal".getBytes(), baos.toByteArray());
// date
Date now = new Date(System.currentTimeMillis());
DateWritable dateWritable = new DateWritable(now);
Date dateValue = dateWritable.get();
baos.reset();
JavaDateObjectInspector dateOI = (JavaDateObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, dateOI.create(dateValue), dateOI, false, (byte) 0, null);
m.put(cfBytes, "date".getBytes(), baos.toByteArray());
// timestamp
Timestamp timestampValue = new Timestamp(now.getTime());
baos.reset();
JavaTimestampObjectInspector timestampOI = (JavaTimestampObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME));
LazyUtils.writePrimitiveUTF8(baos, timestampOI.create(timestampValue), timestampOI, false, (byte) 0, null);
m.put(cfBytes, "timestamp".getBytes(), baos.toByteArray());
// char
baos.reset();
HiveChar charValue = new HiveChar("char", 4);
JavaHiveCharObjectInspector charOI = (JavaHiveCharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new CharTypeInfo(4));
LazyUtils.writePrimitiveUTF8(baos, charOI.create(charValue), charOI, false, (byte) 0, null);
m.put(cfBytes, "char".getBytes(), baos.toByteArray());
// varchar
baos.reset();
HiveVarchar varcharValue = new HiveVarchar("varchar", 7);
JavaHiveVarcharObjectInspector varcharOI = (JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new VarcharTypeInfo(7));
LazyUtils.writePrimitiveUTF8(baos, varcharOI.create(varcharValue), varcharOI, false, (byte) 0, null);
m.put(cfBytes, "varchar".getBytes(), baos.toByteArray());
writer.addMutation(m);
writer.close();
for (Entry<Key, Value> e : conn.createScanner(tableName, new Authorizations())) {
System.out.println(e);
}
// Create the RecordReader
FileInputFormat.addInputPath(conf, new Path("unused"));
InputSplit[] splits = inputformat.getSplits(conf, 0);
assertEquals(splits.length, 1);
RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
Text key = reader.createKey();
AccumuloHiveRow value = reader.createValue();
reader.next(key, value);
Assert.assertEquals(13, value.getTuples().size());
// string
Text cfText = new Text(cf), cqHolder = new Text();
cqHolder.set("string");
byte[] valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyStringObjectInspector lazyStringOI = LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) 0);
LazyString lazyString = (LazyString) LazyFactory.createLazyObject(lazyStringOI);
lazyString.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(new Text(stringValue), lazyString.getWritableObject());
// boolean
cqHolder.set("boolean");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyBooleanObjectInspector lazyBooleanOI = (LazyBooleanObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
LazyBoolean lazyBoolean = (LazyBoolean) LazyFactory.createLazyObject(lazyBooleanOI);
lazyBoolean.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(booleanValue, lazyBoolean.getWritableObject().get());
// tinyint
cqHolder.set("tinyint");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyByteObjectInspector lazyByteOI = (LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
LazyByte lazyByte = (LazyByte) LazyFactory.createLazyObject(lazyByteOI);
lazyByte.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(tinyintValue, lazyByte.getWritableObject().get());
// smallint
cqHolder.set("smallint");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyShortObjectInspector lazyShortOI = (LazyShortObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
LazyShort lazyShort = (LazyShort) LazyFactory.createLazyObject(lazyShortOI);
lazyShort.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(smallintValue, lazyShort.getWritableObject().get());
// int
cqHolder.set("int");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyIntObjectInspector lazyIntOI = (LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
LazyInteger lazyInt = (LazyInteger) LazyFactory.createLazyObject(lazyIntOI);
lazyInt.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(intValue, lazyInt.getWritableObject().get());
// bigint
cqHolder.set("bigint");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyLongObjectInspector lazyLongOI = (LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
LazyLong lazyLong = (LazyLong) LazyFactory.createLazyObject(lazyLongOI);
lazyLong.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(bigintValue, lazyLong.getWritableObject().get());
// float
cqHolder.set("float");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyFloatObjectInspector lazyFloatOI = (LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
LazyFloat lazyFloat = (LazyFloat) LazyFactory.createLazyObject(lazyFloatOI);
lazyFloat.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(floatValue, lazyFloat.getWritableObject().get(), 0);
// double
cqHolder.set("double");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyDoubleObjectInspector lazyDoubleOI = (LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
LazyDouble lazyDouble = (LazyDouble) LazyFactory.createLazyObject(lazyDoubleOI);
lazyDouble.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(doubleValue, lazyDouble.getWritableObject().get(), 0);
// decimal
cqHolder.set("decimal");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyHiveDecimalObjectInspector lazyDecimalOI = (LazyHiveDecimalObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new DecimalTypeInfo(5, 2));
LazyHiveDecimal lazyDecimal = (LazyHiveDecimal) LazyFactory.createLazyObject(lazyDecimalOI);
lazyDecimal.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(decimalValue, lazyDecimal.getWritableObject().getHiveDecimal());
// date
cqHolder.set("date");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyDateObjectInspector lazyDateOI = (LazyDateObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME));
LazyDate lazyDate = (LazyDate) LazyFactory.createLazyObject(lazyDateOI);
lazyDate.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(dateValue, lazyDate.getWritableObject().get());
// timestamp
cqHolder.set("timestamp");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyTimestampObjectInspector lazyTimestampOI = (LazyTimestampObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME));
LazyTimestamp lazyTimestamp = (LazyTimestamp) LazyFactory.createLazyObject(lazyTimestampOI);
lazyTimestamp.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(timestampValue, lazyTimestamp.getWritableObject().getTimestamp());
// char
cqHolder.set("char");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyHiveCharObjectInspector lazyCharOI = (LazyHiveCharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new CharTypeInfo(4));
LazyHiveChar lazyChar = (LazyHiveChar) LazyFactory.createLazyObject(lazyCharOI);
lazyChar.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(charValue, lazyChar.getWritableObject().getHiveChar());
// varchar
cqHolder.set("varchar");
valueBytes = value.getValue(cfText, cqHolder);
Assert.assertNotNull(valueBytes);
byteRef.setData(valueBytes);
LazyHiveVarcharObjectInspector lazyVarcharOI = (LazyHiveVarcharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new VarcharTypeInfo(7));
LazyHiveVarchar lazyVarchar = (LazyHiveVarchar) LazyFactory.createLazyObject(lazyVarcharOI);
lazyVarchar.init(byteRef, 0, valueBytes.length);
Assert.assertEquals(varcharValue.toString(), lazyVarchar.getWritableObject().getHiveVarchar().toString());
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class HiveSplitGenerator method pruneBuckets.
private InputSplit[] pruneBuckets(MapWork work, InputSplit[] splits) {
final BitSet buckets = work.getIncludedBuckets();
final String bucketIn = buckets.toString();
List<InputSplit> filteredSplits = new ArrayList<InputSplit>(splits.length / 2);
for (InputSplit split : splits) {
final int bucket = Utilities.parseSplitBucket(split);
if (bucket < 0 || buckets.get(bucket)) {
// match or UNKNOWN
filteredSplits.add(split);
} else {
LOG.info("Pruning with IN ({}) - removing {}", bucketIn, split);
}
}
if (filteredSplits.size() < splits.length) {
// reallocate only if any filters pruned
splits = filteredSplits.toArray(new InputSplit[filteredSplits.size()]);
}
return splits;
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class CustomPartitionVertex method getBucketSplitMapForPath.
/*
* This method generates the map of bucket to file splits.
*/
private Multimap<Integer, InputSplit> getBucketSplitMapForPath(Map<String, Set<FileSplit>> pathFileSplitsMap) {
int bucketNum = 0;
Multimap<Integer, InputSplit> bucketToInitialSplitMap = ArrayListMultimap.<Integer, InputSplit>create();
for (Map.Entry<String, Set<FileSplit>> entry : pathFileSplitsMap.entrySet()) {
int bucketId = bucketNum % numBuckets;
for (FileSplit fsplit : entry.getValue()) {
bucketToInitialSplitMap.put(bucketId, fsplit);
}
bucketNum++;
}
// well.
if (bucketNum < numBuckets) {
int loopedBucketId = 0;
for (; bucketNum < numBuckets; bucketNum++) {
for (InputSplit fsplit : bucketToInitialSplitMap.get(loopedBucketId)) {
bucketToInitialSplitMap.put(bucketNum, fsplit);
}
loopedBucketId++;
}
}
return bucketToInitialSplitMap;
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class StatsNoJobTask method aggregateStats.
private int aggregateStats(ExecutorService threadPool, Hive db) {
int ret = 0;
try {
Collection<Partition> partitions = null;
if (work.getPrunedPartitionList() == null) {
partitions = getPartitionsList();
} else {
partitions = work.getPrunedPartitionList().getPartitions();
}
// non-partitioned table
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
try {
Path dir = new Path(tTable.getSd().getLocation());
long numRows = 0;
long rawDataSize = 0;
long fileSize = 0;
long numFiles = 0;
FileSystem fs = dir.getFileSystem(conf);
FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs);
boolean statsAvailable = false;
for (FileStatus file : fileList) {
if (!file.isDir()) {
InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance(table.getInputFormatClass(), jc);
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table.getDataLocation().toString() });
if (file.getLen() == 0) {
numFiles += 1;
statsAvailable = true;
} else {
org.apache.hadoop.mapred.RecordReader<?, ?> recordReader = inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
StatsProvidingRecordReader statsRR;
if (recordReader instanceof StatsProvidingRecordReader) {
statsRR = (StatsProvidingRecordReader) recordReader;
numRows += statsRR.getStats().getRowCount();
rawDataSize += statsRR.getStats().getRawDataSize();
fileSize += file.getLen();
numFiles += 1;
statsAvailable = true;
}
recordReader.close();
}
}
}
if (statsAvailable) {
parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
db.alterTable(tableFullName, new Table(tTable), environmentContext);
String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']';
LOG.debug(msg);
console.printInfo(msg);
} else {
String msg = "Table " + tableFullName + " does not provide stats.";
LOG.debug(msg);
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats for " + tableFullName + ".", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
}
} else {
// Partitioned table
for (Partition partn : partitions) {
threadPool.execute(new StatsCollection(partn));
}
LOG.debug("Stats collection waiting for threadpool to shutdown..");
shutdownAndAwaitTermination(threadPool);
LOG.debug("Stats collection threadpool shutdown successful.");
ret = updatePartitions(db);
}
} catch (Exception e) {
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = -1;
}
}
// anything else indicates failure
return ret;
}
Aggregations