use of org.apache.phoenix.mapreduce.PhoenixInputSplit in project DataX by alibaba.
the class HbaseSQLHelper method split.
public static List<Configuration> split(HbaseSQLReaderConfig readerConfig) {
PhoenixInputFormat inputFormat = new PhoenixInputFormat<PhoenixRecordWritable>();
org.apache.hadoop.conf.Configuration conf = generatePhoenixConf(readerConfig);
JobID jobId = new JobID(Key.MOCK_JOBID_IDENTIFIER, Key.MOCK_JOBID);
JobContextImpl jobContext = new JobContextImpl(conf, jobId);
List<Configuration> resultConfigurations = new ArrayList<Configuration>();
List<InputSplit> rawSplits = null;
try {
rawSplits = inputFormat.getSplits(jobContext);
LOG.info("split size is " + rawSplits.size());
for (InputSplit split : rawSplits) {
Configuration cfg = readerConfig.getOriginalConfig().clone();
byte[] splitSer = HadoopSerializationUtil.serialize((PhoenixInputSplit) split);
String splitBase64Str = org.apache.commons.codec.binary.Base64.encodeBase64String(splitSer);
cfg.set(Key.SPLIT_KEY, splitBase64Str);
resultConfigurations.add(cfg);
}
} catch (IOException e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.GET_PHOENIX_SPLITS_ERROR, "获取表的split信息时出现了异常,请检查hbase集群服务是否正常," + e.getMessage(), e);
} catch (InterruptedException e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.GET_PHOENIX_SPLITS_ERROR, "获取表的split信息时被中断,请重试,若还有问题请联系datax管理员," + e.getMessage(), e);
}
return resultConfigurations;
}
use of org.apache.phoenix.mapreduce.PhoenixInputSplit in project Datax by n-kong.
the class HbaseSQLHelper method split.
public static List<Configuration> split(HbaseSQLReaderConfig readerConfig) {
PhoenixInputFormat inputFormat = new PhoenixInputFormat<PhoenixRecordWritable>();
org.apache.hadoop.conf.Configuration conf = generatePhoenixConf(readerConfig);
JobID jobId = new JobID(Key.MOCK_JOBID_IDENTIFIER, Key.MOCK_JOBID);
JobContextImpl jobContext = new JobContextImpl(conf, jobId);
List<Configuration> resultConfigurations = new ArrayList<Configuration>();
List<InputSplit> rawSplits = null;
try {
rawSplits = inputFormat.getSplits(jobContext);
LOG.info("split size is " + rawSplits.size());
for (InputSplit split : rawSplits) {
Configuration cfg = readerConfig.getOriginalConfig().clone();
byte[] splitSer = HadoopSerializationUtil.serialize((PhoenixInputSplit) split);
String splitBase64Str = org.apache.commons.codec.binary.Base64.encodeBase64String(splitSer);
cfg.set(Key.SPLIT_KEY, splitBase64Str);
resultConfigurations.add(cfg);
}
} catch (IOException e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.GET_PHOENIX_SPLITS_ERROR, "获取表的split信息时出现了异常,请检查hbase集群服务是否正常," + e.getMessage(), e);
} catch (InterruptedException e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.GET_PHOENIX_SPLITS_ERROR, "获取表的split信息时被中断,请重试,若还有问题请联系datax管理员," + e.getMessage(), e);
}
return resultConfigurations;
}
use of org.apache.phoenix.mapreduce.PhoenixInputSplit in project Datax by n-kong.
the class HbaseSQLReaderTask method init.
public void init() {
LOG.info("reader table info: " + this.readerConfig.toString());
try {
this.getPColumns();
} catch (SQLException e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.GET_PHOENIX_CONNECTIONINFO_ERROR, "获取表的列出问题,重试,若还有问题请检查hbase集群状态," + e.getMessage());
}
this.phoenixInputFormat = new PhoenixInputFormat<PhoenixRecordWritable>();
String splitBase64Str = this.readerConfig.getOriginalConfig().getString(Key.SPLIT_KEY);
byte[] splitBytes = org.apache.commons.codec.binary.Base64.decodeBase64(splitBase64Str);
TaskAttemptID attemptId = new TaskAttemptID();
org.apache.hadoop.conf.Configuration conf = HbaseSQLHelper.generatePhoenixConf(this.readerConfig);
this.hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId);
this.phoenixInputSplit = new PhoenixInputSplit();
try {
HadoopSerializationUtil.deserialize(phoenixInputSplit, splitBytes);
this.phoenixRecordReader = (PhoenixRecordReader) phoenixInputFormat.createRecordReader(phoenixInputSplit, hadoopAttemptContext);
} catch (Exception e) {
throw DataXException.asDataXException(HbaseSQLReaderErrorCode.PHOENIX_CREATEREADER_ERROR, "创建phoenix的reader出现问题,请重试,若还有问题请检查hbase集群状态," + e.getMessage());
}
}
use of org.apache.phoenix.mapreduce.PhoenixInputSplit in project trino by trinodb.
the class TestPhoenixSplit method testPhoenixSplitJsonRoundtrip.
@Test
public void testPhoenixSplitJsonRoundtrip() throws Exception {
List<HostAddress> addresses = ImmutableList.of(HostAddress.fromString("host:9000"));
List<Scan> scans = ImmutableList.of(new Scan().withStartRow(Bytes.toBytes("A")).withStopRow(Bytes.toBytes("Z")));
PhoenixInputSplit phoenixInputSplit = new PhoenixInputSplit(scans);
PhoenixSplit expected = new PhoenixSplit(addresses, SerializedPhoenixInputSplit.serialize(phoenixInputSplit));
assertTrue(objectMapper.canSerialize(PhoenixSplit.class));
String json = objectMapper.writeValueAsString(expected);
PhoenixSplit actual = objectMapper.readValue(json, PhoenixSplit.class);
assertEquals(actual.getPhoenixInputSplit(), expected.getPhoenixInputSplit());
assertEquals(actual.getAddresses(), expected.getAddresses());
}
use of org.apache.phoenix.mapreduce.PhoenixInputSplit in project trino by trinodb.
the class PhoenixSplitManager method generateSplits.
// mostly copied from PhoenixInputFormat, but without the region size calculations
private List<InputSplit> generateSplits(QueryPlan queryPlan, List<KeyRange> splits, int maxScansPerSplit) throws IOException {
requireNonNull(queryPlan, "queryPlan is null");
requireNonNull(splits, "splits is null");
try (org.apache.hadoop.hbase.client.Connection connection = phoenixClient.getHConnection()) {
RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(queryPlan.getTableRef().getTable().getPhysicalName().toString()));
long regionSize = -1;
List<InputSplit> inputSplits = new ArrayList<>(splits.size());
for (List<Scan> scans : queryPlan.getScans()) {
HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
String regionLocation = location.getHostname();
if (log.isDebugEnabled()) {
log.debug("Scan count[%d] : %s ~ %s", scans.size(), Bytes.toStringBinary(scans.get(0).getStartRow()), Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
log.debug("First scan : %swith scanAttribute : %s [scanCache, cacheBlock, scanBatch] : [%d, %s, %d] and regionLocation : %s", scans.get(0), scans.get(0).getAttributesMap(), scans.get(0).getCaching(), scans.get(0).getCacheBlocks(), scans.get(0).getBatch(), regionLocation);
for (int i = 0, limit = scans.size(); i < limit; i++) {
log.debug("EXPECTED_UPPER_REGION_KEY[%d] : %s", i, Bytes.toStringBinary(scans.get(i).getAttribute(EXPECTED_UPPER_REGION_KEY)));
}
}
/*
* Handle parallel execution explicitly in Trino rather than internally in Phoenix.
* Each split is handled by a single ConcatResultIterator
* (See PhoenixClient.getResultSet(...))
*/
for (List<Scan> splitScans : Lists.partition(scans, maxScansPerSplit)) {
inputSplits.add(new PhoenixInputSplit(splitScans, regionSize, regionLocation));
}
}
return inputSplits;
}
}
Aggregations