Skip to content

Commit cccab85

Browse files
author
Richard Guo
committed
Fix right-anti-joins when the inner relation is proven unique
For an inner_unique join, we always assume that the executor will stop scanning for matches after the first match. Therefore, for a mergejoin that is inner_unique and whose mergeclauses are sufficient to identify a match, we set the skip_mark_restore flag to true, indicating that the executor need not do mark/restore calls. However, merge-right-anti-join did not get this memo and continues scanning the inner side for matches after the first match. If there are duplicates in the outer scan, we may incorrectly skip matching some inner tuples, which can lead to wrong results. Here we fix this issue by ensuring that merge-right-anti-join also advances to next outer tuple after the first match in inner_unique cases. This also saves cycles by avoiding unnecessary scanning of inner tuples after the first match. Although hash-right-anti-join does not suffer from this wrong results issue, we apply the same change to it as well, to help save cycles for the same reason. Per bug #18522 from Antti Lampinen, and bug #18526 from Feliphe Pozzer. Back-patch to v16 where right-anti-join was introduced. Author: Richard Guo Discussion: https://postgr.es/m/18522-c7a8956126afdfd0@postgresql.org
1 parent fa1a63d commit cccab85

File tree

4 files changed

+100
-20
lines changed

4 files changed

+100
-20
lines changed

src/backend/executor/nodeHashjoin.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -565,20 +565,21 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
565565
}
566566

567567
/*
568-
* In a right-antijoin, we never return a matched tuple.
569-
* And we need to stay on the current outer tuple to
570-
* continue scanning the inner side for matches.
568+
* If we only need to consider the first matching inner
569+
* tuple, then advance to next outer tuple after we've
570+
* processed this one.
571571
*/
572-
if (node->js.jointype == JOIN_RIGHT_ANTI)
573-
continue;
572+
if (node->js.single_match)
573+
node->hj_JoinState = HJ_NEED_NEW_OUTER;
574574

575575
/*
576-
* If we only need to join to the first matching inner
577-
* tuple, then consider returning this one, but after that
578-
* continue with next outer tuple.
576+
* In a right-antijoin, we never return a matched tuple.
577+
* If it's not an inner_unique join, we need to stay on
578+
* the current outer tuple to continue scanning the inner
579+
* side for matches.
579580
*/
580-
if (node->js.single_match)
581-
node->hj_JoinState = HJ_NEED_NEW_OUTER;
581+
if (node->js.jointype == JOIN_RIGHT_ANTI)
582+
continue;
582583

583584
if (otherqual == NULL || ExecQual(otherqual, econtext))
584585
return ExecProject(node->js.ps.ps_ProjInfo);

src/backend/executor/nodeMergejoin.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -805,20 +805,21 @@ ExecMergeJoin(PlanState *pstate)
805805
}
806806

807807
/*
808-
* In a right-antijoin, we never return a matched tuple.
809-
* And we need to stay on the current outer tuple to
810-
* continue scanning the inner side for matches.
808+
* If we only need to consider the first matching inner
809+
* tuple, then advance to next outer tuple after we've
810+
* processed this one.
811811
*/
812-
if (node->js.jointype == JOIN_RIGHT_ANTI)
813-
break;
812+
if (node->js.single_match)
813+
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
814814

815815
/*
816-
* If we only need to join to the first matching inner
817-
* tuple, then consider returning this one, but after that
818-
* continue with next outer tuple.
816+
* In a right-antijoin, we never return a matched tuple.
817+
* If it's not an inner_unique join, we need to stay on
818+
* the current outer tuple to continue scanning the inner
819+
* side for matches.
819820
*/
820-
if (node->js.single_match)
821-
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
821+
if (node->js.jointype == JOIN_RIGHT_ANTI)
822+
break;
822823

823824
qualResult = (otherqual == NULL ||
824825
ExecQual(otherqual, econtext));

src/test/regress/expected/join.out

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2923,6 +2923,60 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol;
29232923
2 | | |
29242924
(3 rows)
29252925

2926+
reset enable_hashjoin;
2927+
reset enable_nestloop;
2928+
--
2929+
-- regression test for bug #18522 (merge-right-anti-join in inner_unique cases)
2930+
--
2931+
create temp table tbl_ra(a int unique, b int);
2932+
insert into tbl_ra select i, i%100 from generate_series(1,1000)i;
2933+
create index on tbl_ra (b);
2934+
analyze tbl_ra;
2935+
set enable_hashjoin to off;
2936+
set enable_nestloop to off;
2937+
-- ensure we get a merge right anti join
2938+
explain (costs off)
2939+
select * from tbl_ra t1
2940+
where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
2941+
QUERY PLAN
2942+
-------------------------------------------------------
2943+
Merge Right Anti Join
2944+
Merge Cond: (t2.b = t1.a)
2945+
-> Index Only Scan using tbl_ra_b_idx on tbl_ra t2
2946+
-> Sort
2947+
Sort Key: t1.a
2948+
-> Bitmap Heap Scan on tbl_ra t1
2949+
Recheck Cond: (b < 2)
2950+
-> Bitmap Index Scan on tbl_ra_b_idx
2951+
Index Cond: (b < 2)
2952+
(9 rows)
2953+
2954+
-- and check we get the expected results
2955+
select * from tbl_ra t1
2956+
where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
2957+
a | b
2958+
------+---
2959+
100 | 0
2960+
101 | 1
2961+
200 | 0
2962+
201 | 1
2963+
300 | 0
2964+
301 | 1
2965+
400 | 0
2966+
401 | 1
2967+
500 | 0
2968+
501 | 1
2969+
600 | 0
2970+
601 | 1
2971+
700 | 0
2972+
701 | 1
2973+
800 | 0
2974+
801 | 1
2975+
900 | 0
2976+
901 | 1
2977+
1000 | 0
2978+
(19 rows)
2979+
29262980
reset enable_hashjoin;
29272981
reset enable_nestloop;
29282982
--

src/test/regress/sql/join.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,30 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol;
693693
reset enable_hashjoin;
694694
reset enable_nestloop;
695695

696+
--
697+
-- regression test for bug #18522 (merge-right-anti-join in inner_unique cases)
698+
--
699+
700+
create temp table tbl_ra(a int unique, b int);
701+
insert into tbl_ra select i, i%100 from generate_series(1,1000)i;
702+
create index on tbl_ra (b);
703+
analyze tbl_ra;
704+
705+
set enable_hashjoin to off;
706+
set enable_nestloop to off;
707+
708+
-- ensure we get a merge right anti join
709+
explain (costs off)
710+
select * from tbl_ra t1
711+
where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
712+
713+
-- and check we get the expected results
714+
select * from tbl_ra t1
715+
where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
716+
717+
reset enable_hashjoin;
718+
reset enable_nestloop;
719+
696720
--
697721
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
698722
--

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy