Search in sources :

Example 1 with SourceTable

use of org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable in project phoenix by apache.

the class IndexScrutinyMapper method setup.

@Override
protected void setup(final Context context) throws IOException, InterruptedException {
    super.setup(context);
    final Configuration configuration = context.getConfiguration();
    try {
        // get a connection with correct CURRENT_SCN (so incoming writes don't throw off the
        // scrutiny)
        final Properties overrideProps = new Properties();
        String scn = configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
        overrideProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
        connection = ConnectionUtil.getOutputConnection(configuration, overrideProps);
        connection.setAutoCommit(false);
        batchSize = PhoenixConfigurationUtil.getScrutinyBatchSize(configuration);
        outputInvalidRows = PhoenixConfigurationUtil.getScrutinyOutputInvalidRows(configuration);
        outputFormat = PhoenixConfigurationUtil.getScrutinyOutputFormat(configuration);
        executeTimestamp = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(configuration);
        // get the index table and column names
        String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(configuration);
        final PTable pdataTable = PhoenixRuntime.getTable(connection, qDataTable);
        final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(configuration);
        final PTable pindexTable = PhoenixRuntime.getTable(connection, qIndexTable);
        // set the target table based on whether we're running the MR over the data or index
        // table
        SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(configuration);
        SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? new SourceTargetColumnNames.DataSourceColNames(pdataTable, pindexTable) : new SourceTargetColumnNames.IndexSourceColNames(pdataTable, pindexTable);
        qSourceTable = columnNames.getQualifiedSourceTableName();
        qTargetTable = columnNames.getQualifiedTargetTableName();
        List<String> targetColNames = columnNames.getTargetColNames();
        List<String> sourceColNames = columnNames.getSourceColNames();
        List<String> targetPkColNames = columnNames.getTargetPkColNames();
        String targetPksCsv = Joiner.on(",").join(SchemaUtil.getEscapedFullColumnNames(targetPkColNames));
        numSourcePkCols = columnNames.getSourcePkColNames().size();
        numTargetPkCols = targetPkColNames.size();
        if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
            outputConn = ConnectionUtil.getOutputConnection(configuration, new Properties());
            String upsertQuery = PhoenixConfigurationUtil.getUpsertStatement(configuration);
            this.outputUpsertStmt = outputConn.prepareStatement(upsertQuery);
        }
        outputMaxRows = PhoenixConfigurationUtil.getScrutinyOutputMax(configuration);
        // Create the query against the target table
        // Our query projection should be all the index column names (or their data table
        // equivalent
        // name)
        targetTableQuery = QueryUtil.constructSelectStatement(qTargetTable, columnNames.getCastedTargetColNames(), targetPksCsv, Hint.NO_INDEX, false) + " IN ";
        targetTblColumnMetadata = PhoenixRuntime.generateColumnInfo(connection, qTargetTable, targetColNames);
        sourceTblColumnMetadata = PhoenixRuntime.generateColumnInfo(connection, qSourceTable, sourceColNames);
        LOG.info("Target table base query: " + targetTableQuery);
        md5 = MessageDigest.getInstance("MD5");
    } catch (SQLException | NoSuchAlgorithmException e) {
        throw new RuntimeException(e);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) SQLException(java.sql.SQLException) SourceTable(org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Properties(java.util.Properties) PTable(org.apache.phoenix.schema.PTable)

Example 2 with SourceTable

use of org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable in project phoenix by apache.

the class IndexScrutinyTableOutput method writeJobResults.

/**
 * Writes the results of the given jobs to the metadata table
 * @param conn connection to use
 * @param cmdLineArgs arguments the {@code IndexScrutinyTool} was run with
 * @param completedJobs completed MR jobs
 * @throws IOException
 * @throws SQLException
 */
public static void writeJobResults(Connection conn, String[] cmdLineArgs, List<Job> completedJobs) throws IOException, SQLException {
    PreparedStatement pStmt = conn.prepareStatement(UPSERT_METADATA_SQL);
    for (Job job : completedJobs) {
        Configuration conf = job.getConfiguration();
        String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(conf);
        final PTable pdataTable = PhoenixRuntime.getTable(conn, qDataTable);
        final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(conf);
        final PTable pindexTable = PhoenixRuntime.getTable(conn, qIndexTable);
        SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(conf);
        long scrutinyExecuteTime = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(conf);
        SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? new DataSourceColNames(pdataTable, pindexTable) : new IndexSourceColNames(pdataTable, pindexTable);
        Counters counters = job.getCounters();
        int index = 1;
        pStmt.setString(index++, columnNames.getQualifiedSourceTableName());
        pStmt.setString(index++, columnNames.getQualifiedTargetTableName());
        pStmt.setLong(index++, scrutinyExecuteTime);
        pStmt.setString(index++, sourceTable.name());
        pStmt.setString(index++, Arrays.toString(cmdLineArgs));
        pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.INPUT_RECORDS).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.FAILED_RECORDS).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.VALID_ROW_COUNT).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.INVALID_ROW_COUNT).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BAD_COVERED_COL_VAL_COUNT).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BATCHES_PROCESSED_COUNT).getValue());
        pStmt.setString(index++, Arrays.toString(columnNames.getSourceDynamicCols().toArray()));
        pStmt.setString(index++, Arrays.toString(columnNames.getTargetDynamicCols().toArray()));
        pStmt.setString(index++, getSqlQueryAllInvalidRows(conn, columnNames, scrutinyExecuteTime));
        pStmt.setString(index++, getSqlQueryMissingTargetRows(conn, columnNames, scrutinyExecuteTime));
        pStmt.setString(index++, getSqlQueryBadCoveredColVal(conn, columnNames, scrutinyExecuteTime));
        pStmt.addBatch();
    }
    pStmt.executeBatch();
    conn.commit();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) IndexSourceColNames(org.apache.phoenix.mapreduce.index.SourceTargetColumnNames.IndexSourceColNames) SourceTable(org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable) PreparedStatement(java.sql.PreparedStatement) PhoenixJobCounters(org.apache.phoenix.mapreduce.PhoenixJobCounters) Counters(org.apache.hadoop.mapreduce.Counters) DataSourceColNames(org.apache.phoenix.mapreduce.index.SourceTargetColumnNames.DataSourceColNames) Job(org.apache.hadoop.mapreduce.Job) PTable(org.apache.phoenix.schema.PTable)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)2 SourceTable (org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable)2 PTable (org.apache.phoenix.schema.PTable)2 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 PreparedStatement (java.sql.PreparedStatement)1 SQLException (java.sql.SQLException)1 Properties (java.util.Properties)1 Counters (org.apache.hadoop.mapreduce.Counters)1 Job (org.apache.hadoop.mapreduce.Job)1 PhoenixJobCounters (org.apache.phoenix.mapreduce.PhoenixJobCounters)1 DataSourceColNames (org.apache.phoenix.mapreduce.index.SourceTargetColumnNames.DataSourceColNames)1 IndexSourceColNames (org.apache.phoenix.mapreduce.index.SourceTargetColumnNames.IndexSourceColNames)1