Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,14 @@ public static SemanticNodeProcessor getColumnProcessor() {
return new ColumnExprProcessor();
}

private static boolean findSourceColumn(
private static boolean findSourceColumn(Operator<? extends OperatorDesc> inpOp,
LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
for (Map.Entry<String, TableScanOperator> topOpMap: lctx.getParseCtx().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
Table tbl = tableScanOp.getConf().getTableMetadata();
if (tbl.getTableName().equals(tabAlias)
|| tabAlias.equals(tableScanOp.getConf().getAlias())) {
if (inpOp.getOperatorId().equals(tableScanOp.getOperatorId())
&& (tbl.getTableName().equals(tabAlias)
|| tabAlias.equals(tableScanOp.getConf().getAlias()))) {
for (FieldSchema column: tbl.getCols()) {
if (column.getName().equals(alias)) {
TableAliasInfo table = new TableAliasInfo();
Expand Down Expand Up @@ -241,7 +242,7 @@ public static String getExprString(RowSchema rs, ExprNodeDesc expr,
}
if (tabAlias != null && tabAlias.length() > 0
&& !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) {
if (cond != null && !findSourceColumn(inpOp, lctx, cond, tabAlias, alias) && dep != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
return tabAlias + "." + alias;
Expand Down
19 changes: 19 additions & 0 deletions ql/src/test/queries/clientpositive/lineage8.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger;

create table table_1 (id1 int, id2 int);
create table table_2 (id1 int, id2 int);

create table table_3 as
select id1 from table_1 t1 where t1.id2 = 1
union all
select id1 from table_2 t1 where t1.id2 = 2;

create table table_4 as
select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1
union all
select id1 from table_2 t1 where t1.id2 = 2;

create table table_5 as
select t.id1 from
(select id1 from table_1 t1 where t1.id2 = 1) t
join table_2 t1 on t.id1 = t1.id2;
41 changes: 41 additions & 0 deletions ql/src/test/results/clientpositive/llap/lineage8.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
PREHOOK: query: create table table_1 (id1 int, id2 int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@table_1
PREHOOK: query: create table table_2 (id1 int, id2 int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@table_2
PREHOOK: query: create table table_3 as
select id1 from table_1 t1 where t1.id2 = 1
union all
select id1 from table_2 t1 where t1.id2 = 2
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@table_1
PREHOOK: Input: default@table_2
PREHOOK: Output: database:default
PREHOOK: Output: default@table_3
Result schema has 1 fields, but we don't get as many dependencies
{"version":"1.0","engine":"tez","database":"default","hash":"24a0f860f60a1b7d5f350fd8eb164a37","queryText":"create table table_3 as\nselect id1 from table_1 t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1,2],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"(t1.id2 = 1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_3.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]}
PREHOOK: query: create table table_4 as
select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1
union all
select id1 from table_2 t1 where t1.id2 = 2
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@table_1
PREHOOK: Input: default@table_2
PREHOOK: Output: database:default
PREHOOK: Output: default@table_4
Result schema has 1 fields, but we don't get as many dependencies
{"version":"1.0","engine":"tez","database":"default","hash":"761d0cf34076cec77766bf7af8f1cbe9","queryText":"create table table_4 as\nselect id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"expression":"((t1.id1 = 3) and (t1.id2 = 1))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_4.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]}
PREHOOK: query: create table table_5 as
select t.id1 from
(select id1 from table_1 t1 where t1.id2 = 1) t
join table_2 t1 on t.id1 = t1.id2
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@table_1
PREHOOK: Input: default@table_2
PREHOOK: Output: database:default
PREHOOK: Output: default@table_5
Result schema has 1 fields, but we don't get as many dependencies
{"version":"1.0","engine":"tez","database":"default","hash":"615bb67f6ff2dd50695bffd14c296677","queryText":"create table table_5 as\nselect t.id1 from\n(select id1 from table_1 t1 where t1.id2 = 1) t\njoin table_2 t1 on t.id1 = t1.id2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,1],"targets":[0],"expression":"((t1.id2 = 1) and t1.id1 is not null)","edgeType":"PREDICATE"},{"sources":[1,3],"targets":[0],"expression":"(t1.id1 = t1.id2)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"t1.id2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_5.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]}