use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.
the class AbstractBulkLoadTool method completebulkload.
private void completebulkload(Configuration conf, Path outputPath, List<TargetTableRef> tablesToBeLoaded) throws Exception {
Set<String> tableNames = new HashSet<>(tablesToBeLoaded.size());
for (TargetTableRef table : tablesToBeLoaded) {
if (tableNames.contains(table.getPhysicalName())) {
continue;
}
tableNames.add(table.getPhysicalName());
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
String tableName = table.getPhysicalName();
Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputPath, tableName);
HTable htable = new HTable(conf, tableName);
LOG.info("Loading HFiles for {} from {}", tableName, tableOutputPath);
loader.doBulkLoad(tableOutputPath, htable);
LOG.info("Incremental load complete for table=" + tableName);
}
}
use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.
the class MultiHfileOutputFormat method configureIncrementalLoad.
/**
* Configures the job for MultiHfileOutputFormat.
* @param job
* @param tablesToBeLoaded
* @throws IOException
*/
public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException {
Configuration conf = job.getConfiguration();
job.setOutputFormatClass(MultiHfileOutputFormat.class);
conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName());
// tableStartKeys for all tables.
Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet();
for (TargetTableRef table : tablesToBeLoaded) {
final String tableName = table.getPhysicalName();
try (HTable htable = new HTable(conf, tableName)) {
Set<TableRowkeyPair> startKeys = getRegionStartKeys(tableName, htable.getRegionLocator());
tablesStartKeys.addAll(startKeys);
String compressionConfig = configureCompression(htable.getTableDescriptor());
String bloomTypeConfig = configureBloomType(htable.getTableDescriptor());
String blockSizeConfig = configureBlockSize(htable.getTableDescriptor());
String blockEncodingConfig = configureDataBlockEncoding(htable.getTableDescriptor());
Map<String, String> tableConfigs = Maps.newHashMap();
if (StringUtils.isNotBlank(compressionConfig)) {
tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig);
}
if (StringUtils.isNotBlank(bloomTypeConfig)) {
tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY, bloomTypeConfig);
}
if (StringUtils.isNotBlank(blockSizeConfig)) {
tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY, blockSizeConfig);
}
if (StringUtils.isNotBlank(blockEncodingConfig)) {
tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, blockEncodingConfig);
}
table.setConfiguration(tableConfigs);
final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table);
// set the table definition in the config to be used during the RecordWriter..
conf.set(tableName, tableDefns);
TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns);
LOG.info(" the table logical name is " + tbl.getLogicalName());
}
}
LOG.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count");
job.setNumReduceTasks(tablesStartKeys.size());
configurePartitioner(job, tablesStartKeys);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
}
use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.
the class AbstractBulkLoadTool method loadData.
private int loadData(Configuration conf, CommandLine cmdLine) throws Exception {
String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
String indexTableName = cmdLine.getOptionValue(INDEX_TABLE_NAME_OPT.getOpt());
String qualifiedTableName = SchemaUtil.getQualifiedTableName(schemaName, tableName);
String qualifiedIndexTableName = null;
if (indexTableName != null) {
qualifiedIndexTableName = SchemaUtil.getQualifiedTableName(schemaName, indexTableName);
}
if (cmdLine.hasOption(ZK_QUORUM_OPT.getOpt())) {
// ZK_QUORUM_OPT is optional, but if it's there, use it for both the conn and the job.
String zkQuorum = cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt());
PhoenixDriver.ConnectionInfo info = PhoenixDriver.ConnectionInfo.create(zkQuorum);
LOG.info("Configuring HBase connection to {}", info);
for (Map.Entry<String, String> entry : info.asProps()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Setting {} = {}", entry.getKey(), entry.getValue());
}
conf.set(entry.getKey(), entry.getValue());
}
}
final Connection conn = QueryUtil.getConnection(conf);
if (LOG.isDebugEnabled()) {
LOG.debug("Reading columns from {} :: {}", ((PhoenixConnection) conn).getURL(), qualifiedTableName);
}
List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);
Preconditions.checkNotNull(importColumns);
Preconditions.checkArgument(!importColumns.isEmpty(), "Column info list is empty");
FormatToBytesWritableMapper.configureColumnInfoList(conf, importColumns);
boolean ignoreInvalidRows = cmdLine.hasOption(IGNORE_ERRORS_OPT.getOpt());
conf.setBoolean(FormatToBytesWritableMapper.IGNORE_INVALID_ROW_CONFKEY, ignoreInvalidRows);
conf.set(FormatToBytesWritableMapper.TABLE_NAME_CONFKEY, qualifiedTableName);
// give subclasses their hook
configureOptions(cmdLine, importColumns, conf);
try {
validateTable(conn, schemaName, tableName);
} finally {
conn.close();
}
final String inputPaths = cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt());
final Path outputPath;
if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
} else {
outputPath = new Path("/tmp/" + UUID.randomUUID());
}
List<TargetTableRef> tablesToBeLoaded = new ArrayList<TargetTableRef>();
PTable table = PhoenixRuntime.getTable(conn, qualifiedTableName);
tablesToBeLoaded.add(new TargetTableRef(qualifiedTableName, table.getPhysicalName().getString()));
boolean hasLocalIndexes = false;
for (PTable index : table.getIndexes()) {
if (index.getIndexType() == IndexType.LOCAL) {
hasLocalIndexes = qualifiedIndexTableName == null ? true : index.getTableName().getString().equals(qualifiedIndexTableName);
if (hasLocalIndexes)
break;
}
}
// using conn after it's been closed... o.O
tablesToBeLoaded.addAll(getIndexTables(conn, schemaName, qualifiedTableName));
// When loading a single index table, check index table name is correct
if (qualifiedIndexTableName != null) {
TargetTableRef targetIndexRef = null;
for (TargetTableRef tmpTable : tablesToBeLoaded) {
if (tmpTable.getLogicalName().compareToIgnoreCase(qualifiedIndexTableName) == 0) {
targetIndexRef = tmpTable;
break;
}
}
if (targetIndexRef == null) {
throw new IllegalStateException("Bulk Loader error: index table " + qualifiedIndexTableName + " doesn't exist");
}
tablesToBeLoaded.clear();
tablesToBeLoaded.add(targetIndexRef);
}
return submitJob(conf, tableName, inputPaths, outputPath, tablesToBeLoaded, hasLocalIndexes);
}
use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.
the class MultiHfileOutputFormat method getTableConfigurations.
/**
* Returns the set of configurations that have been configured for the table during job initialization.
* @param conf
* @param tableName
* @return
*/
private static Map<String, String> getTableConfigurations(Configuration conf, final String tableName) {
String tableDefn = conf.get(tableName);
if (StringUtils.isEmpty(tableDefn)) {
return null;
}
TargetTableRef table = TargetTableRefFunctions.FROM_JSON.apply(tableDefn);
Map<String, String> tableConfigs = table.getConfiguration();
return tableConfigs;
}
use of org.apache.phoenix.mapreduce.bulkload.TargetTableRef in project phoenix by apache.
the class AbstractBulkLoadTool method getIndexTables.
/**
* Get the index tables of current data table
* @throws java.sql.SQLException
*/
private List<TargetTableRef> getIndexTables(Connection conn, String schemaName, String qualifiedTableName) throws SQLException {
PTable table = PhoenixRuntime.getTable(conn, qualifiedTableName);
List<TargetTableRef> indexTables = new ArrayList<TargetTableRef>();
for (PTable indexTable : table.getIndexes()) {
indexTables.add(new TargetTableRef(indexTable.getName().getString(), indexTable.getPhysicalName().getString()));
}
return indexTables;
}
Aggregations