use of org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable in project phoenix by apache.
the class IndexScrutinyMapper method setup.
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
super.setup(context);
final Configuration configuration = context.getConfiguration();
try {
// get a connection with correct CURRENT_SCN (so incoming writes don't throw off the
// scrutiny)
final Properties overrideProps = new Properties();
String scn = configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
overrideProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
connection = ConnectionUtil.getOutputConnection(configuration, overrideProps);
connection.setAutoCommit(false);
batchSize = PhoenixConfigurationUtil.getScrutinyBatchSize(configuration);
outputInvalidRows = PhoenixConfigurationUtil.getScrutinyOutputInvalidRows(configuration);
outputFormat = PhoenixConfigurationUtil.getScrutinyOutputFormat(configuration);
executeTimestamp = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(configuration);
// get the index table and column names
String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(configuration);
final PTable pdataTable = PhoenixRuntime.getTable(connection, qDataTable);
final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(configuration);
final PTable pindexTable = PhoenixRuntime.getTable(connection, qIndexTable);
// set the target table based on whether we're running the MR over the data or index
// table
SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(configuration);
SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? new SourceTargetColumnNames.DataSourceColNames(pdataTable, pindexTable) : new SourceTargetColumnNames.IndexSourceColNames(pdataTable, pindexTable);
qSourceTable = columnNames.getQualifiedSourceTableName();
qTargetTable = columnNames.getQualifiedTargetTableName();
List<String> targetColNames = columnNames.getTargetColNames();
List<String> sourceColNames = columnNames.getSourceColNames();
List<String> targetPkColNames = columnNames.getTargetPkColNames();
String targetPksCsv = Joiner.on(",").join(SchemaUtil.getEscapedFullColumnNames(targetPkColNames));
numSourcePkCols = columnNames.getSourcePkColNames().size();
numTargetPkCols = targetPkColNames.size();
if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
outputConn = ConnectionUtil.getOutputConnection(configuration, new Properties());
String upsertQuery = PhoenixConfigurationUtil.getUpsertStatement(configuration);
this.outputUpsertStmt = outputConn.prepareStatement(upsertQuery);
}
outputMaxRows = PhoenixConfigurationUtil.getScrutinyOutputMax(configuration);
// Create the query against the target table
// Our query projection should be all the index column names (or their data table
// equivalent
// name)
targetTableQuery = QueryUtil.constructSelectStatement(qTargetTable, columnNames.getCastedTargetColNames(), targetPksCsv, Hint.NO_INDEX, false) + " IN ";
targetTblColumnMetadata = PhoenixRuntime.generateColumnInfo(connection, qTargetTable, targetColNames);
sourceTblColumnMetadata = PhoenixRuntime.generateColumnInfo(connection, qSourceTable, sourceColNames);
LOG.info("Target table base query: " + targetTableQuery);
md5 = MessageDigest.getInstance("MD5");
} catch (SQLException | NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
use of org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable in project phoenix by apache.
the class IndexScrutinyTableOutput method writeJobResults.
/**
* Writes the results of the given jobs to the metadata table
* @param conn connection to use
* @param cmdLineArgs arguments the {@code IndexScrutinyTool} was run with
* @param completedJobs completed MR jobs
* @throws IOException
* @throws SQLException
*/
public static void writeJobResults(Connection conn, String[] cmdLineArgs, List<Job> completedJobs) throws IOException, SQLException {
PreparedStatement pStmt = conn.prepareStatement(UPSERT_METADATA_SQL);
for (Job job : completedJobs) {
Configuration conf = job.getConfiguration();
String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(conf);
final PTable pdataTable = PhoenixRuntime.getTable(conn, qDataTable);
final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(conf);
final PTable pindexTable = PhoenixRuntime.getTable(conn, qIndexTable);
SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(conf);
long scrutinyExecuteTime = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(conf);
SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? new DataSourceColNames(pdataTable, pindexTable) : new IndexSourceColNames(pdataTable, pindexTable);
Counters counters = job.getCounters();
int index = 1;
pStmt.setString(index++, columnNames.getQualifiedSourceTableName());
pStmt.setString(index++, columnNames.getQualifiedTargetTableName());
pStmt.setLong(index++, scrutinyExecuteTime);
pStmt.setString(index++, sourceTable.name());
pStmt.setString(index++, Arrays.toString(cmdLineArgs));
pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.INPUT_RECORDS).getValue());
pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.FAILED_RECORDS).getValue());
pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.VALID_ROW_COUNT).getValue());
pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.INVALID_ROW_COUNT).getValue());
pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BAD_COVERED_COL_VAL_COUNT).getValue());
pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BATCHES_PROCESSED_COUNT).getValue());
pStmt.setString(index++, Arrays.toString(columnNames.getSourceDynamicCols().toArray()));
pStmt.setString(index++, Arrays.toString(columnNames.getTargetDynamicCols().toArray()));
pStmt.setString(index++, getSqlQueryAllInvalidRows(conn, columnNames, scrutinyExecuteTime));
pStmt.setString(index++, getSqlQueryMissingTargetRows(conn, columnNames, scrutinyExecuteTime));
pStmt.setString(index++, getSqlQueryBadCoveredColVal(conn, columnNames, scrutinyExecuteTime));
pStmt.addBatch();
}
pStmt.executeBatch();
conn.commit();
}
Aggregations