use of org.apache.flink.types.RowKind in project flink by apache.
the class UpdatingTopCityExample method main.
public static void main(String[] args) throws Exception {
// prepare the session
final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
final TableEnvironment env = TableEnvironment.create(settings);
// create an empty temporary CSV directory for this example
final String populationDirPath = createTemporaryDirectory();
// register a table in the catalog that points to the CSV file
env.executeSql("CREATE TABLE PopulationUpdates (" + " city STRING," + " state STRING," + " update_year INT," + " population_diff INT" + ") WITH (" + " 'connector' = 'filesystem'," + " 'path' = '" + populationDirPath + "'," + " 'format' = 'csv'" + ")");
// insert some example data into the table
final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + " ('Los Angeles', 'CA', 2013, 13106100), " + " ('Los Angeles', 'CA', 2014, 72600), " + " ('Los Angeles', 'CA', 2015, 72300), " + " ('Chicago', 'IL', 2013, 9553270), " + " ('Chicago', 'IL', 2014, 11340), " + " ('Chicago', 'IL', 2015, -6730), " + " ('Houston', 'TX', 2013, 6330660), " + " ('Houston', 'TX', 2014, 172960), " + " ('Houston', 'TX', 2015, 172940), " + " ('Phoenix', 'AZ', 2013, 4404680), " + " ('Phoenix', 'AZ', 2014, 86740), " + " ('Phoenix', 'AZ', 2015, 89700), " + " ('San Antonio', 'TX', 2013, 2280580), " + " ('San Antonio', 'TX', 2014, 49180), " + " ('San Antonio', 'TX', 2015, 50870), " + " ('San Francisco', 'CA', 2013, 4521310), " + " ('San Francisco', 'CA', 2014, 65940), " + " ('San Francisco', 'CA', 2015, 62290), " + " ('Dallas', 'TX', 2013, 6817520), " + " ('Dallas', 'TX', 2014, 137740), " + " ('Dallas', 'TX', 2015, 154020)");
// since all cluster operations of the Table API are executed asynchronously,
// we need to wait until the insertion has been completed,
// an exception is thrown in case of an error
insertionResult.await();
// read from table and aggregate the total population per city
final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
// either define a nested SQL statement with sub-queries
// or divide the problem into sub-views which will be optimized
// as a whole during planning
env.createTemporaryView("CurrentPopulation", currentPopulation);
// find the top 2 cities with the highest population per state,
// we use a sub-query that is correlated with every unique state,
// for every state we rank by population and return the top 2 cities
final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + " (SELECT DISTINCT state FROM CurrentPopulation) States," + " LATERAL (" + " SELECT city, latest_year, population" + " FROM CurrentPopulation" + " WHERE state = States.state" + " ORDER BY population DESC, latest_year DESC" + " LIMIT 2" + " )");
// execute().collect() and a List where we maintain updates
try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
final List<Row> materializedUpdates = new ArrayList<>();
iterator.forEachRemaining(row -> {
final RowKind kind = row.getKind();
switch(kind) {
case INSERT:
case UPDATE_AFTER:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.add(row);
break;
case UPDATE_BEFORE:
case DELETE:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.remove(row);
break;
}
});
// show the final output table if the result is bounded,
// the output should exclude San Antonio because it has a smaller population than
// Houston or Dallas in Texas (TX)
materializedUpdates.forEach(System.out::println);
}
}
use of org.apache.flink.types.RowKind in project flink by apache.
the class ChangelogCsvDeserializer method deserialize.
@Override
public RowData deserialize(byte[] message) {
// parse the columns including a changelog flag
final String[] columns = new String(message).split(Pattern.quote(columnDelimiter));
final RowKind kind = RowKind.valueOf(columns[0]);
final Row row = new Row(kind, parsingTypes.size());
for (int i = 0; i < parsingTypes.size(); i++) {
row.setField(i, parse(parsingTypes.get(i).getTypeRoot(), columns[i + 1]));
}
// convert to internal data structure
return (RowData) converter.toInternal(row);
}
use of org.apache.flink.types.RowKind in project flink by apache.
the class DynamicKafkaRecordSerializationSchema method serialize.
@Override
public ProducerRecord<byte[], byte[]> serialize(RowData consumedRow, KafkaSinkContext context, Long timestamp) {
// shortcut in case no input projection is required
if (keySerialization == null && !hasMetadata) {
final byte[] valueSerialized = valueSerialization.serialize(consumedRow);
return new ProducerRecord<>(topic, extractPartition(consumedRow, null, valueSerialized, context.getPartitionsForTopic(topic)), null, valueSerialized);
}
final byte[] keySerialized;
if (keySerialization == null) {
keySerialized = null;
} else {
final RowData keyRow = createProjectedRow(consumedRow, RowKind.INSERT, keyFieldGetters);
keySerialized = keySerialization.serialize(keyRow);
}
final byte[] valueSerialized;
final RowKind kind = consumedRow.getRowKind();
if (upsertMode) {
if (kind == RowKind.DELETE || kind == RowKind.UPDATE_BEFORE) {
// transform the message as the tombstone message
valueSerialized = null;
} else {
// make the message to be INSERT to be compliant with the INSERT-ONLY format
final RowData valueRow = DynamicKafkaRecordSerializationSchema.createProjectedRow(consumedRow, kind, valueFieldGetters);
valueRow.setRowKind(RowKind.INSERT);
valueSerialized = valueSerialization.serialize(valueRow);
}
} else {
final RowData valueRow = DynamicKafkaRecordSerializationSchema.createProjectedRow(consumedRow, kind, valueFieldGetters);
valueSerialized = valueSerialization.serialize(valueRow);
}
return new ProducerRecord<>(topic, extractPartition(consumedRow, keySerialized, valueSerialized, context.getPartitionsForTopic(topic)), readMetadata(consumedRow, KafkaDynamicSink.WritableMetadata.TIMESTAMP), keySerialized, valueSerialized, readMetadata(consumedRow, KafkaDynamicSink.WritableMetadata.HEADERS));
}
use of org.apache.flink.types.RowKind in project flink by apache.
the class StreamingJoinOperator method processElement.
/**
* Process an input element and output incremental joined records, retraction messages will be
* sent in some scenarios.
*
* <p>Following is the pseudo code to describe the core logic of this method. The logic of this
* method is too complex, so we provide the pseudo code to help understand the logic. We should
* keep sync the following pseudo code with the real logic of the method.
*
* <p>Note: "+I" represents "INSERT", "-D" represents "DELETE", "+U" represents "UPDATE_AFTER",
* "-U" represents "UPDATE_BEFORE". We forward input RowKind if it is inner join, otherwise, we
* always send insert and delete for simplification. We can optimize this to send -U & +U
* instead of D & I in the future (see FLINK-17337). They are equivalent in this join case. It
* may need some refactoring if we want to send -U & +U, so we still keep -D & +I for now for
* simplification. See {@code
* FlinkChangelogModeInferenceProgram.SatisfyModifyKindSetTraitVisitor}.
*
* <pre>
* if input record is accumulate
* | if input side is outer
* | | if there is no matched rows on the other side, send +I[record+null], state.add(record, 0)
* | | if there are matched rows on the other side
* | | | if other side is outer
* | | | | if the matched num in the matched rows == 0, send -D[null+other]
* | | | | if the matched num in the matched rows > 0, skip
* | | | | otherState.update(other, old + 1)
* | | | endif
* | | | send +I[record+other]s, state.add(record, other.size)
* | | endif
* | endif
* | if input side not outer
* | | state.add(record)
* | | if there is no matched rows on the other side, skip
* | | if there are matched rows on the other side
* | | | if other side is outer
* | | | | if the matched num in the matched rows == 0, send -D[null+other]
* | | | | if the matched num in the matched rows > 0, skip
* | | | | otherState.update(other, old + 1)
* | | | | send +I[record+other]s
* | | | else
* | | | | send +I/+U[record+other]s (using input RowKind)
* | | | endif
* | | endif
* | endif
* endif
*
* if input record is retract
* | state.retract(record)
* | if there is no matched rows on the other side
* | | if input side is outer, send -D[record+null]
* | endif
* | if there are matched rows on the other side, send -D[record+other]s if outer, send -D/-U[record+other]s if inner.
* | | if other side is outer
* | | | if the matched num in the matched rows == 0, this should never happen!
* | | | if the matched num in the matched rows == 1, send +I[null+other]
* | | | if the matched num in the matched rows > 1, skip
* | | | otherState.update(other, old - 1)
* | | endif
* | endif
* endif
* </pre>
*
* @param input the input element
* @param inputSideStateView state of input side
* @param otherSideStateView state of other side
* @param inputIsLeft whether input side is left side
*/
private void processElement(RowData input, JoinRecordStateView inputSideStateView, JoinRecordStateView otherSideStateView, boolean inputIsLeft) throws Exception {
boolean inputIsOuter = inputIsLeft ? leftIsOuter : rightIsOuter;
boolean otherIsOuter = inputIsLeft ? rightIsOuter : leftIsOuter;
boolean isAccumulateMsg = RowDataUtil.isAccumulateMsg(input);
RowKind inputRowKind = input.getRowKind();
// erase RowKind for later state updating
input.setRowKind(RowKind.INSERT);
AssociatedRecords associatedRecords = AssociatedRecords.of(input, inputIsLeft, otherSideStateView, joinCondition);
if (isAccumulateMsg) {
// record is accumulate
if (inputIsOuter) {
// input side is outer
OuterJoinRecordStateView inputSideOuterStateView = (OuterJoinRecordStateView) inputSideStateView;
if (associatedRecords.isEmpty()) {
// there is no matched rows on the other side
// send +I[record+null]
outRow.setRowKind(RowKind.INSERT);
outputNullPadding(input, inputIsLeft);
// state.add(record, 0)
inputSideOuterStateView.addRecord(input, 0);
} else {
// there are matched rows on the other side
if (otherIsOuter) {
// other side is outer
OuterJoinRecordStateView otherSideOuterStateView = (OuterJoinRecordStateView) otherSideStateView;
for (OuterRecord outerRecord : associatedRecords.getOuterRecords()) {
RowData other = outerRecord.record;
// if the matched num in the matched rows == 0
if (outerRecord.numOfAssociations == 0) {
// send -D[null+other]
outRow.setRowKind(RowKind.DELETE);
outputNullPadding(other, !inputIsLeft);
}
// ignore matched number > 0
// otherState.update(other, old + 1)
otherSideOuterStateView.updateNumOfAssociations(other, outerRecord.numOfAssociations + 1);
}
}
// send +I[record+other]s
outRow.setRowKind(RowKind.INSERT);
for (RowData other : associatedRecords.getRecords()) {
output(input, other, inputIsLeft);
}
// state.add(record, other.size)
inputSideOuterStateView.addRecord(input, associatedRecords.size());
}
} else {
// input side not outer
// state.add(record)
inputSideStateView.addRecord(input);
if (!associatedRecords.isEmpty()) {
// if there are matched rows on the other side
if (otherIsOuter) {
// if other side is outer
OuterJoinRecordStateView otherSideOuterStateView = (OuterJoinRecordStateView) otherSideStateView;
for (OuterRecord outerRecord : associatedRecords.getOuterRecords()) {
if (outerRecord.numOfAssociations == 0) {
// if the matched num in the matched rows == 0
// send -D[null+other]
outRow.setRowKind(RowKind.DELETE);
outputNullPadding(outerRecord.record, !inputIsLeft);
}
// otherState.update(other, old + 1)
otherSideOuterStateView.updateNumOfAssociations(outerRecord.record, outerRecord.numOfAssociations + 1);
}
// send +I[record+other]s
outRow.setRowKind(RowKind.INSERT);
} else {
// send +I/+U[record+other]s (using input RowKind)
outRow.setRowKind(inputRowKind);
}
for (RowData other : associatedRecords.getRecords()) {
output(input, other, inputIsLeft);
}
}
// skip when there is no matched rows on the other side
}
} else {
// input record is retract
// state.retract(record)
inputSideStateView.retractRecord(input);
if (associatedRecords.isEmpty()) {
// there is no matched rows on the other side
if (inputIsOuter) {
// input side is outer
// send -D[record+null]
outRow.setRowKind(RowKind.DELETE);
outputNullPadding(input, inputIsLeft);
}
// nothing to do when input side is not outer
} else {
// there are matched rows on the other side
if (inputIsOuter) {
// send -D[record+other]s
outRow.setRowKind(RowKind.DELETE);
} else {
// send -D/-U[record+other]s (using input RowKind)
outRow.setRowKind(inputRowKind);
}
for (RowData other : associatedRecords.getRecords()) {
output(input, other, inputIsLeft);
}
// if other side is outer
if (otherIsOuter) {
OuterJoinRecordStateView otherSideOuterStateView = (OuterJoinRecordStateView) otherSideStateView;
for (OuterRecord outerRecord : associatedRecords.getOuterRecords()) {
if (outerRecord.numOfAssociations == 1) {
// send +I[null+other]
outRow.setRowKind(RowKind.INSERT);
outputNullPadding(outerRecord.record, !inputIsLeft);
}
// nothing else to do when number of associations > 1
// otherState.update(other, old - 1)
otherSideOuterStateView.updateNumOfAssociations(outerRecord.record, outerRecord.numOfAssociations - 1);
}
}
}
}
}
use of org.apache.flink.types.RowKind in project flink by apache.
the class GenericRowRecordSortComparator method compare.
@Override
public int compare(GenericRowData row1, GenericRowData row2) {
RowKind kind1 = row1.getRowKind();
RowKind kind2 = row2.getRowKind();
if (kind1 != kind2) {
return kind1.toByteValue() - kind2.toByteValue();
} else {
Object key1 = sortKeyGetter.getFieldOrNull(row1);
Object key2 = sortKeyGetter.getFieldOrNull(row2);
if (key1 instanceof Comparable) {
return ((Comparable) key1).compareTo(key2);
} else {
throw new UnsupportedOperationException();
}
}
}
Aggregations