use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.
the class TestStreaming method checkDataWritten2.
/**
* @param validationQuery query to read from table to compare data against {@code records}
* @param records expected data. each row is CVS list of values
*/
private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles, String validationQuery, boolean vectorize, String... records) throws Exception {
ValidWriteIdList txns = msClient.getValidWriteIds(AcidUtils.getFullTableName(dbName, tblName));
AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, txns);
Assert.assertEquals(0, dir.getObsolete().size());
Assert.assertEquals(0, dir.getOriginalFiles().size());
List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
System.out.println("Files found: ");
for (AcidUtils.ParsedDelta pd : current) {
System.out.println(pd.getPath().toString());
}
Assert.assertEquals(numExpectedFiles, current.size());
// find the absolute minimum transaction
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
for (AcidUtils.ParsedDelta pd : current) {
if (pd.getMaxWriteId() > max) {
max = pd.getMaxWriteId();
}
if (pd.getMinWriteId() < min) {
min = pd.getMinWriteId();
}
}
Assert.assertEquals(minTxn, min);
Assert.assertEquals(maxTxn, max);
boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
if (vectorize) {
conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
}
String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
for (String strategy : ((Validator.StringSet) HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
// run it with each split strategy - make sure there are differences
conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
List<String> actualResult = queryTable(driver, validationQuery);
for (int i = 0; i < actualResult.size(); i++) {
Assert.assertEquals("diff at [" + i + "]. actual=" + actualResult + " expected=" + Arrays.toString(records), records[i], actualResult.get(i));
}
}
conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
}
use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.
the class AcidUtils method setValidWriteIdList.
/**
* Set the valid write id list for the current table scan.
*/
public static void setValidWriteIdList(Configuration conf, TableScanDesc tsDesc) {
if (tsDesc.isTranscationalTable()) {
String dbName = tsDesc.getDatabaseName();
String tableName = tsDesc.getTableName();
ValidWriteIdList validWriteIdList = getTableValidWriteIdList(conf, AcidUtils.getFullTableName(dbName, tableName));
if (validWriteIdList != null) {
setValidWriteIdList(conf, validWriteIdList);
} else {
// Log error if the acid table is missing from the ValidWriteIdList conf
LOG.error("setValidWriteIdList on table: " + AcidUtils.getFullTableName(dbName, tableName) + " isAcidTable: " + true + " acidProperty: " + getAcidOperationalProperties(conf) + " couldn't find the ValidWriteId list from ValidTxnWriteIdList: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
throw new IllegalStateException("ACID table: " + AcidUtils.getFullTableName(dbName, tableName) + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
}
}
}
use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.
the class HiveInputFormat method addSplitsForGroup.
/*
* AddSplitsForGroup collects separate calls to setInputPaths into one where possible.
* The reason for this is that this is faster on some InputFormats. E.g.: Orc will start
* a threadpool to do the work and calling it multiple times unnecessarily will create a lot
* of unnecessary thread pools.
*/
private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf, InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits, TableDesc table, List<InputSplit> result) throws IOException {
ValidWriteIdList validWriteIdList = AcidUtils.getTableValidWriteIdList(conf, table.getTableName());
ValidWriteIdList validMmWriteIdList;
if (AcidUtils.isInsertOnlyTable(table.getProperties())) {
if (validWriteIdList == null) {
throw new IOException("Insert-Only table: " + table.getTableName() + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
}
validMmWriteIdList = validWriteIdList;
} else {
// for non-MM case
validMmWriteIdList = null;
}
try {
Utilities.copyTablePropertiesToConf(table, conf);
if (tableScan != null) {
AcidUtils.setAcidOperationalProperties(conf, tableScan.getConf().isTranscationalTable(), tableScan.getConf().getAcidOperationalProperties());
if (tableScan.getConf().isTranscationalTable() && (validWriteIdList == null)) {
throw new IOException("Acid table: " + table.getTableName() + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
}
if (validWriteIdList != null) {
AcidUtils.setValidWriteIdList(conf, validWriteIdList);
}
}
} catch (HiveException e) {
throw new IOException(e);
}
if (tableScan != null) {
pushFilters(conf, tableScan, this.mrwork);
}
Path[] finalDirs = processPathsForMmRead(dirs, conf, validMmWriteIdList);
if (finalDirs == null) {
// No valid inputs.
return;
}
FileInputFormat.setInputPaths(conf, finalDirs);
conf.setInputFormat(inputFormat.getClass());
int headerCount = 0;
int footerCount = 0;
if (table != null) {
headerCount = Utilities.getHeaderCount(table);
footerCount = Utilities.getFooterCount(table, conf);
if (headerCount != 0 || footerCount != 0) {
// Input file has header or footer, cannot be splitted.
HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE);
}
}
InputSplit[] iss = inputFormat.getSplits(conf, splits);
for (InputSplit is : iss) {
result.add(new HiveInputSplit(is, inputFormatClass.getName()));
}
if (iss.length == 0 && finalDirs.length > 0 && conf.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
// If there are no inputs; the Execution engine skips the operator tree.
// To prevent it from happening; an opaque ZeroRows input is added here - when needed.
result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(finalDirs[0].toString()), ZeroRowsInputFormat.class.getName()));
}
}
use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.
the class TestDbTxnManager2 method testValidWriteIdListSnapshot.
@Test
public void testValidWriteIdListSnapshot() throws Exception {
// Create a transactional table
dropTable(new String[] { "temp.T7" });
CommandProcessorResponse cpr = driver.run("create database if not exists temp");
checkCmdOnDriver(cpr);
cpr = driver.run("create table if not exists temp.T7(a int, b int) clustered by(b) into 2 buckets stored as orc " + "TBLPROPERTIES ('transactional'='true')");
checkCmdOnDriver(cpr);
// Open a base txn which allocates write ID and then committed.
long baseTxnId = txnMgr.openTxn(ctx, "u0");
long baseWriteId = txnMgr.getTableWriteId("temp", "T7");
Assert.assertEquals(1, baseWriteId);
// committed baseTxnId
txnMgr.commitTxn();
// Open a txn with no writes.
HiveTxnManager txnMgr1 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
long underHwmOpenTxnId = txnMgr1.openTxn(ctx, "u1");
Assert.assertTrue("Invalid txn ID", underHwmOpenTxnId > baseTxnId);
// Open a txn to be tested for ValidWriteIdList. Get the ValidTxnList during open itself.
// Verify the ValidWriteIdList with no open/aborted write txns on this table.
// Write ID of committed txn should be valid.
HiveTxnManager txnMgr2 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
long testTxnId = txnMgr2.openTxn(ctx, "u2");
Assert.assertTrue("Invalid txn ID", testTxnId > underHwmOpenTxnId);
String testValidTxns = txnMgr2.getValidTxns().toString();
ValidWriteIdList testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
Assert.assertEquals(baseWriteId, testValidWriteIds.getHighWatermark());
Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
// Open a txn which allocate write ID and remain open state.
HiveTxnManager txnMgr3 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
long aboveHwmOpenTxnId = txnMgr3.openTxn(ctx, "u3");
Assert.assertTrue("Invalid txn ID", aboveHwmOpenTxnId > testTxnId);
long aboveHwmOpenWriteId = txnMgr3.getTableWriteId("temp", "T7");
Assert.assertEquals(2, aboveHwmOpenWriteId);
// Allocate writeId to txn under HWM. This will get Id greater than a txn > HWM.
long underHwmOpenWriteId = txnMgr1.getTableWriteId("temp", "T7");
Assert.assertEquals(3, underHwmOpenWriteId);
// Verify the ValidWriteIdList with one open txn on this table. Write ID of open txn should be invalid.
testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
Assert.assertEquals(underHwmOpenWriteId, testValidWriteIds.getHighWatermark());
Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
// Commit the txn under HWM.
// Verify the writeId of this committed txn should be invalid for test txn.
txnMgr1.commitTxn();
testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
Assert.assertEquals(underHwmOpenWriteId, testValidWriteIds.getHighWatermark());
Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
// Allocate writeId from test txn and then verify ValidWriteIdList.
// Write Ids of committed and self test txn should be valid but writeId of open txn should be invalid.
// WriteId of recently committed txn which was open when get ValidTxnList snapshot should be invalid as well.
long testWriteId = txnMgr2.getTableWriteId("temp", "T7");
Assert.assertEquals(4, testWriteId);
testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
Assert.assertEquals(testWriteId, testValidWriteIds.getHighWatermark());
Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(testWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
txnMgr2.commitTxn();
txnMgr3.commitTxn();
cpr = driver.run("drop database if exists temp cascade");
checkCmdOnDriver(cpr);
}
use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.
the class TestCompactor method checkExpectedTxnsPresent.
private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max, int numBuckets) throws IOException {
ValidWriteIdList writeIdList = new ValidWriteIdList() {
@Override
public String getTableName() {
return "AcidTable";
}
@Override
public boolean isWriteIdValid(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
@Override
public String writeToString() {
return "";
}
@Override
public void readFromString(String src) {
}
@Override
public Long getMinOpenWriteId() {
return null;
}
@Override
public long getHighWatermark() {
return Long.MAX_VALUE;
}
@Override
public long[] getInvalidWriteIds() {
return new long[0];
}
@Override
public boolean isValidBase(long writeid) {
return true;
}
@Override
public boolean isWriteIdAborted(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
};
OrcInputFormat aif = new OrcInputFormat();
Configuration conf = new Configuration();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
conf.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(numBuckets));
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, true, bucket, writeIdList, base, deltas);
RecordIdentifier identifier = reader.createKey();
OrcStruct value = reader.createValue();
long currentTxn = min;
boolean seenCurrentTxn = false;
while (reader.next(identifier, value)) {
if (!seenCurrentTxn) {
Assert.assertEquals(currentTxn, identifier.getWriteId());
seenCurrentTxn = true;
}
if (currentTxn != identifier.getWriteId()) {
Assert.assertEquals(currentTxn + 1, identifier.getWriteId());
currentTxn++;
}
}
Assert.assertEquals(max, currentTxn);
}
Aggregations