Skip to content
This repository was archived by the owner on Jan 28, 2021. It is now read-only.

Commit 7beeb8d

Browse files
authored
Merge pull request #572 from erizocosmico/feature/prune-columns
analyzer: add rule to prune unnecessary columns
2 parents b8d84bf + 14c6aeb commit 7beeb8d

File tree

5 files changed

+545
-2
lines changed

5 files changed

+545
-2
lines changed

engine_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -981,14 +981,14 @@ func TestDescribe(t *testing.T) {
981981

982982
query := `DESCRIBE FORMAT=TREE SELECT * FROM mytable`
983983
expectedSeq := []sql.Row{
984-
sql.NewRow("Table(mytable): Projected "),
984+
sql.NewRow("Table(mytable)"),
985985
sql.NewRow(" ├─ Column(i, INT64, nullable=false)"),
986986
sql.NewRow(" └─ Column(s, TEXT, nullable=false)"),
987987
}
988988

989989
expectedParallel := []sql.Row{
990990
{"Exchange(parallelism=2)"},
991-
{" └─ Table(mytable): Projected "},
991+
{" └─ Table(mytable)"},
992992
{" ├─ Column(i, INT64, nullable=false)"},
993993
{" └─ Column(s, TEXT, nullable=false)"},
994994
}

sql/analyzer/optimization_rules.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ func reorderProjection(ctx *sql.Context, a *Analyzer, n sql.Node) (sql.Node, err
116116
return node, nil
117117
}
118118

119+
if len(requiredColumns) == 0 {
120+
return node, nil
121+
}
122+
119123
didNeedReorder = true
120124

121125
// Only add the required columns for that node in the projection.

sql/analyzer/prune_columns.go

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
package analyzer
2+
3+
import (
4+
"fmt"
5+
6+
"gopkg.in/src-d/go-mysql-server.v0/sql"
7+
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
8+
"gopkg.in/src-d/go-mysql-server.v0/sql/plan"
9+
)
10+
11+
type usedColumns map[string]map[string]struct{}
12+
13+
func pruneColumns(ctx *sql.Context, a *Analyzer, n sql.Node) (sql.Node, error) {
14+
a.Log("pruning columns, node of type %T", n)
15+
if !n.Resolved() {
16+
return n, nil
17+
}
18+
19+
columns := make(usedColumns)
20+
21+
// All the columns required for the output of the query must be mark as
22+
// used, otherwise the schema would change.
23+
for _, col := range n.Schema() {
24+
if _, ok := columns[col.Source]; !ok {
25+
columns[col.Source] = make(map[string]struct{})
26+
}
27+
columns[col.Source][col.Name] = struct{}{}
28+
}
29+
30+
findUsedColumns(columns, n)
31+
32+
n, err := addSubqueryBarriers(n)
33+
if err != nil {
34+
return nil, err
35+
}
36+
37+
n, err = pruneUnusedColumns(n, columns)
38+
if err != nil {
39+
return nil, err
40+
}
41+
42+
n, err = pruneSubqueries(ctx, a, n, columns)
43+
if err != nil {
44+
return nil, err
45+
}
46+
47+
return fixRemainingFieldsIndexes(n)
48+
}
49+
50+
func pruneSubqueryColumns(
51+
ctx *sql.Context,
52+
a *Analyzer,
53+
n *plan.SubqueryAlias,
54+
parentColumns usedColumns,
55+
) (sql.Node, error) {
56+
a.Log("pruning columns of subquery with alias %q", n.Name())
57+
58+
columns := make(usedColumns)
59+
60+
// The columns coming from the parent have the subquery alias name as the
61+
// source. We need to find the real table in order to prune the subquery
62+
// correctly.
63+
tableByCol := make(map[string]string)
64+
for _, col := range n.Child.Schema() {
65+
tableByCol[col.Name] = col.Source
66+
}
67+
68+
for col := range parentColumns[n.Name()] {
69+
table, ok := tableByCol[col]
70+
if !ok {
71+
// This should never happen, but better be safe than sorry.
72+
return nil, fmt.Errorf("this is likely a bug: missing projected column %q on subquery %q", col, n.Name())
73+
}
74+
75+
if _, ok := columns[table]; !ok {
76+
columns[table] = make(map[string]struct{})
77+
}
78+
79+
columns[table][col] = struct{}{}
80+
}
81+
82+
findUsedColumns(columns, n)
83+
84+
node, err := addSubqueryBarriers(n.Child)
85+
if err != nil {
86+
return nil, err
87+
}
88+
89+
node, err = pruneUnusedColumns(node, columns)
90+
if err != nil {
91+
return nil, err
92+
}
93+
94+
node, err = pruneSubqueries(ctx, a, node, columns)
95+
if err != nil {
96+
return nil, err
97+
}
98+
99+
// There is no need to fix the field indexes after pruning here
100+
// because the main query will take care of fixing the indexes of all the
101+
// nodes in the tree.
102+
103+
return plan.NewSubqueryAlias(n.Name(), node), nil
104+
}
105+
106+
func findUsedColumns(columns usedColumns, n sql.Node) {
107+
plan.Inspect(n, func(n sql.Node) bool {
108+
switch n := n.(type) {
109+
case *plan.Project:
110+
addUsedProjectColumns(columns, n.Projections)
111+
return true
112+
case *plan.GroupBy:
113+
addUsedProjectColumns(columns, n.Aggregate)
114+
addUsedColumns(columns, n.Grouping)
115+
return true
116+
case *plan.SubqueryAlias:
117+
return false
118+
}
119+
120+
exp, ok := n.(sql.Expressioner)
121+
if ok {
122+
addUsedColumns(columns, exp.Expressions())
123+
}
124+
125+
return true
126+
})
127+
}
128+
129+
func addSubqueryBarriers(n sql.Node) (sql.Node, error) {
130+
return n.TransformUp(func(n sql.Node) (sql.Node, error) {
131+
sq, ok := n.(*plan.SubqueryAlias)
132+
if !ok {
133+
return n, nil
134+
}
135+
136+
return &subqueryBarrier{sq}, nil
137+
})
138+
}
139+
140+
func pruneSubqueries(
141+
ctx *sql.Context,
142+
a *Analyzer,
143+
n sql.Node,
144+
parentColumns usedColumns,
145+
) (sql.Node, error) {
146+
return n.TransformUp(func(n sql.Node) (sql.Node, error) {
147+
barrier, ok := n.(*subqueryBarrier)
148+
if !ok {
149+
return n, nil
150+
}
151+
152+
return pruneSubqueryColumns(ctx, a, barrier.SubqueryAlias, parentColumns)
153+
})
154+
}
155+
156+
func pruneUnusedColumns(n sql.Node, columns usedColumns) (sql.Node, error) {
157+
return n.TransformUp(func(n sql.Node) (sql.Node, error) {
158+
switch n := n.(type) {
159+
case *plan.Project:
160+
return pruneProject(n, columns), nil
161+
case *plan.GroupBy:
162+
return pruneGroupBy(n, columns), nil
163+
default:
164+
return n, nil
165+
}
166+
})
167+
}
168+
169+
type tableColumnPair struct {
170+
table string
171+
column string
172+
}
173+
174+
func fixRemainingFieldsIndexes(n sql.Node) (sql.Node, error) {
175+
return n.TransformUp(func(n sql.Node) (sql.Node, error) {
176+
exp, ok := n.(sql.Expressioner)
177+
if !ok {
178+
return n, nil
179+
}
180+
181+
var schema sql.Schema
182+
for _, c := range n.Children() {
183+
schema = append(schema, c.Schema()...)
184+
}
185+
186+
if len(schema) == 0 {
187+
return n, nil
188+
}
189+
190+
indexes := make(map[tableColumnPair]int)
191+
for i, col := range schema {
192+
indexes[tableColumnPair{col.Source, col.Name}] = i
193+
}
194+
195+
return exp.TransformExpressions(func(e sql.Expression) (sql.Expression, error) {
196+
gf, ok := e.(*expression.GetField)
197+
if !ok {
198+
return e, nil
199+
}
200+
201+
idx, ok := indexes[tableColumnPair{gf.Table(), gf.Name()}]
202+
if !ok {
203+
return nil, fmt.Errorf("unable to find column %q of table %q", gf.Name(), gf.Table())
204+
}
205+
206+
ngf := *gf
207+
return ngf.WithIndex(idx), nil
208+
})
209+
})
210+
}
211+
212+
func addUsedProjectColumns(
213+
columns usedColumns,
214+
projection []sql.Expression,
215+
) {
216+
var candidates []sql.Expression
217+
for _, e := range projection {
218+
// Only check for expressions that are not directly a GetField. This
219+
// is because in a projection we only care about those that were used
220+
// to compute new columns, such as aliases and so on. The fields that
221+
// are just passed up in the tree will already be in some other part
222+
// if they are really used.
223+
if _, ok := e.(*expression.GetField); !ok {
224+
candidates = append(candidates, e)
225+
}
226+
}
227+
228+
addUsedColumns(columns, candidates)
229+
}
230+
231+
func addUsedColumns(columns usedColumns, exprs []sql.Expression) {
232+
for _, e := range exprs {
233+
expression.Inspect(e, func(e sql.Expression) bool {
234+
if gf, ok := e.(*expression.GetField); ok {
235+
if _, ok := columns[gf.Table()]; !ok {
236+
columns[gf.Table()] = make(map[string]struct{})
237+
}
238+
columns[gf.Table()][gf.Name()] = struct{}{}
239+
}
240+
return true
241+
})
242+
}
243+
}
244+
245+
func pruneProject(n *plan.Project, columns usedColumns) sql.Node {
246+
var remaining []sql.Expression
247+
for _, e := range n.Projections {
248+
if !shouldPruneExpr(e, columns) {
249+
remaining = append(remaining, e)
250+
}
251+
}
252+
253+
if len(remaining) == 0 {
254+
return n.Child
255+
}
256+
257+
return plan.NewProject(remaining, n.Child)
258+
}
259+
260+
func pruneGroupBy(n *plan.GroupBy, columns usedColumns) sql.Node {
261+
var remaining []sql.Expression
262+
for _, e := range n.Aggregate {
263+
if !shouldPruneExpr(e, columns) {
264+
remaining = append(remaining, e)
265+
}
266+
}
267+
268+
if len(remaining) == 0 {
269+
return n.Child
270+
}
271+
272+
return plan.NewGroupBy(remaining, n.Grouping, n.Child)
273+
}
274+
275+
func shouldPruneExpr(e sql.Expression, cols usedColumns) bool {
276+
gf, ok := e.(*expression.GetField)
277+
if !ok {
278+
return false
279+
}
280+
281+
if gf.Table() == "" {
282+
return false
283+
}
284+
285+
if c, ok := cols[gf.Table()]; ok {
286+
if _, ok := c[gf.Name()]; ok {
287+
return false
288+
}
289+
}
290+
291+
return true
292+
}
293+
294+
type subqueryBarrier struct {
295+
*plan.SubqueryAlias
296+
}
297+
298+
func (b *subqueryBarrier) TransformUp(f sql.TransformNodeFunc) (sql.Node, error) {
299+
return f(b)
300+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy