Skip to content

Improve Status performance with many ignored files #1379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: v6-exp
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
git: skip ignored files while walking worktree
Skip ignored files when walking through the worktree.

This signigifantly improves the performance of `Status()`:
In a repository with 3M ignored files `Status` now takes 5 s instead of 160 s.
  • Loading branch information
silkeh committed Jan 13, 2025
commit 25c3930c981662c2e5a5d7302bf6e2e840d5caca
137 changes: 137 additions & 0 deletions plumbing/format/gitignore/noder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package gitignore

import (
"slices"

"github.com/go-git/go-git/v5/utils/merkletrie/noder"
)

var _ noder.Noder = (*MatchNoder)(nil)

// MatchNoder is an implementation of [noder.Noder] that only includes nodes based on a pattern.
type MatchNoder struct {
noder.Noder

matcher Matcher
invert bool
path []string
children []noder.Noder
}

// IgnoreNoder returns a [MatchNoder] that filters out the given pattern.
func IgnoreNoder(m Matcher, n noder.Noder) *MatchNoder {
var path []string
if name := n.Name(); name != "." {
path = []string{name}
}

return &MatchNoder{matcher: m, invert: true, Noder: n, path: path}
}

// Children returns matched children.
// It implements [noder.Noder].
func (n *MatchNoder) Children() ([]noder.Noder, error) {
if len(n.children) > 0 {
return n.children, nil
}

children, err := n.Noder.Children()
if err != nil {
return nil, err
}

n.children = n.ignoreChildren(children)

return n.children, nil
}

func (n *MatchNoder) ignoreChildren(children []noder.Noder) []noder.Noder {
found := make([]noder.Noder, 0, len(children))

for _, child := range children {
path := append(n.path, child.Name())
if n.match(path, child.IsDir()) {
continue
}

found = append(found, n.newChild(child, path))
}

return found
}

func (n *MatchNoder) match(path []string, isDir bool) bool {
if n.matcher != nil && n.matcher.Match(path, isDir) {
return n.invert
}

return !n.invert
}

func (n *MatchNoder) newChild(child noder.Noder, path []string) noder.Noder {
if !child.IsDir() {
return child
}

return &MatchNoder{
matcher: n.matcher,
invert: n.invert,
Noder: child,
path: slices.Clone(path),
}
}

// NumChildren returns the number of children.
// It implements [noder.Noder].
func (n *MatchNoder) NumChildren() (int, error) {
children, err := n.Children()
if err != nil {
return 0, err
}

return len(children), nil
}

// PathIgnored returns true if the given [noder.Path] is ignored.
func (n *MatchNoder) PathIgnored(path noder.Path) bool {
return n.match(n.noderPaths(path), path.IsDir())
}

// FindPath returns the corresponding [noder.Path] from the tree if there is one.
// It does not apply patterns, allowing retrieval of ignored nodes.
func (n *MatchNoder) FindPath(p noder.Path) (path noder.Path, found bool) {
node := n.Noder

for i := range p {
node, found = n.findChild(node, p[i].Name())
if !found {
return nil, false
}

path = append(path, node)
}

return
}

func (n *MatchNoder) findChild(node noder.Noder, name string) (noder.Noder, bool) {
children, _ := node.Children()

for _, child := range children {
if child.Name() == name {
return child, true
}
}

return nil, false
}

func (n *MatchNoder) noderPaths(path noder.Path) []string {
parts := make([]string, len(path))

for i, p := range path {
parts[i] = p.Name()
}

return parts
}
144 changes: 144 additions & 0 deletions plumbing/format/gitignore/noder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package gitignore_test

import (
"io/fs"
"testing"

"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/utils/merkletrie/noder"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type mockNoder struct {
hash []byte
string string
name string
isDir bool
children []noder.Noder
childrenErr error
skip bool
}

func (m mockNoder) Hash() []byte { return m.hash }
func (m mockNoder) String() string { return m.string }
func (m mockNoder) Name() string { return m.name }
func (m mockNoder) IsDir() bool { return m.isDir }
func (m mockNoder) Children() ([]noder.Noder, error) { return m.children, m.childrenErr }
func (m mockNoder) NumChildren() (int, error) { return len(m.children), m.childrenErr }
func (m mockNoder) Skip() bool { return m.skip }

func TestMatchNoder_Children(t *testing.T) {
mock := mockNoder{
name: ".",
children: []noder.Noder{
mockNoder{name: "volcano"},
mockNoder{name: "caldera"},
mockNoder{name: "super", isDir: true, children: []noder.Noder{
mockNoder{name: "caldera", children: []noder.Noder{}},
}},
},
}
patterns := []gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}

tests := map[string]struct {
Matcher gitignore.Matcher
Noder mockNoder
ExpErr error
ExpChildren []noder.Noder
Skip bool
}{
"children": {
Matcher: gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}),
Noder: mock,
ExpChildren: []noder.Noder{
mock.children[1],
gitignore.IgnoreNoder(gitignore.NewMatcher(patterns), mock.children[2]),
},
},
"error": {
Matcher: gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
}),
Noder: mockNoder{name: ".", childrenErr: fs.ErrNotExist},
ExpErr: fs.ErrNotExist,
},
}

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
ignoreNoder := gitignore.IgnoreNoder(tc.Matcher, tc.Noder)

children, err := ignoreNoder.Children()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, tc.ExpChildren, children)

// Do it twice for the cached children
children, err = ignoreNoder.Children()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, tc.ExpChildren, children)

num, err := ignoreNoder.NumChildren()
require.ErrorIs(t, err, tc.ExpErr)
assert.Equal(t, len(tc.ExpChildren), num)
})
}
}

func TestMatchNoder_PathIgnored(t *testing.T) {
matcher := gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
})

found := gitignore.IgnoreNoder(matcher, mockNoder{name: "."}).PathIgnored([]noder.Noder{
mockNoder{name: "head"},
mockNoder{name: "middle"},
mockNoder{name: "volcano"},
})
assert.True(t, found)

found = gitignore.IgnoreNoder(matcher, mockNoder{name: "."}).PathIgnored([]noder.Noder{
mockNoder{name: "head"},
mockNoder{name: "middle"},
mockNoder{name: "potato"},
})
assert.False(t, found)
}

func TestMatchNoder_FindPath(t *testing.T) {
mock := mockNoder{
name: ".",
children: []noder.Noder{
mockNoder{name: "volcano"},
mockNoder{name: "super", isDir: true, children: []noder.Noder{
mockNoder{name: "volcano", children: []noder.Noder{}},
}},
},
}
matcher := gitignore.NewMatcher([]gitignore.Pattern{
gitignore.ParsePattern("**/middle/v[uo]l?ano", nil),
gitignore.ParsePattern("volcano", nil),
})

node, found := gitignore.IgnoreNoder(matcher, mock).FindPath([]noder.Noder{
mockNoder{name: "super"},
mockNoder{name: "volcano"},
})
assert.True(t, found)
assert.Equal(t, noder.Path{mock.children[1], mock.children[1].(mockNoder).children[0]}, node)

node, found = gitignore.IgnoreNoder(matcher, mock).FindPath([]noder.Noder{
mockNoder{name: "super"},
mockNoder{name: "caldera"},
})
assert.False(t, found)
assert.Nil(t, node)
}
41 changes: 31 additions & 10 deletions utils/merkletrie/difftree.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ import (
"errors"
"fmt"

"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/utils/merkletrie/noder"
)

Expand Down Expand Up @@ -297,8 +298,14 @@ func DiffTreeContext(ctx context.Context, fromTree, toTree noder.Noder,
case noMoreNoders:
return ret, nil
case onlyFromRemains:
if err = ret.AddRecursiveDelete(from); err != nil {
return nil, err
if node, ok := ignoredNode(toTree, from); ok {
if err = diffNodesSameName(&ret, ii, ii.from.current, node); err != nil {
return nil, err
}
} else {
if err = ret.AddRecursiveDelete(from); err != nil {
return nil, err
}
}
if err = ii.nextFrom(); err != nil {
return nil, err
Expand Down Expand Up @@ -353,8 +360,10 @@ func diffNodes(changes *Changes, ii *doubleIter) error {
// compare their full paths as strings
switch from.Compare(to) {
case -1:
if err = changes.AddRecursiveDelete(from); err != nil {
return err
if ok := isIgnoredNode(to[0], from); !ok {
if err = changes.AddRecursiveDelete(from); err != nil {
return err
}
}
if err = ii.nextFrom(); err != nil {
return err
Expand All @@ -367,19 +376,16 @@ func diffNodes(changes *Changes, ii *doubleIter) error {
return err
}
default:
if err := diffNodesSameName(changes, ii); err != nil {
if err := diffNodesSameName(changes, ii, ii.from.current, ii.to.current); err != nil {
return err
}
}

return nil
}

func diffNodesSameName(changes *Changes, ii *doubleIter) error {
from := ii.from.current
to := ii.to.current

status, err := ii.compare()
func diffNodesSameName(changes *Changes, ii *doubleIter, from, to noder.Path) error {
status, err := ii.compareNoders(from, to)
if err != nil {
return err
}
Expand Down Expand Up @@ -451,3 +457,18 @@ func diffDirs(changes *Changes, ii *doubleIter) error {

return nil
}

func isIgnoredNode(tree noder.Noder, path noder.Path) bool {
in, ok := tree.(*gitignore.MatchNoder)

return ok && in.PathIgnored(path)
}

func ignoredNode(tree noder.Noder, path noder.Path) (noder.Path, bool) {
in, ok := tree.(*gitignore.MatchNoder)
if !ok || !in.PathIgnored(path) {
return nil, false
}

return in.FindPath(path)
}
14 changes: 9 additions & 5 deletions utils/merkletrie/doubleiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,21 +137,25 @@ const (
// Compare returns the comparison between the current elements in the
// merkletries.
func (d *doubleIter) compare() (s comparison, err error) {
s.sameHash = d.hashEqual(d.from.current, d.to.current)
return d.compareNoders(d.from.current, d.to.current)
}

func (d *doubleIter) compareNoders(from, to noder.Noder) (s comparison, err error) {
s.sameHash = d.hashEqual(from, to)

fromIsDir := d.from.current.IsDir()
toIsDir := d.to.current.IsDir()
fromIsDir := from.IsDir()
toIsDir := to.IsDir()

s.bothAreDirs = fromIsDir && toIsDir
s.bothAreFiles = !fromIsDir && !toIsDir
s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles

fromNumChildren, err := d.from.current.NumChildren()
fromNumChildren, err := from.NumChildren()
if err != nil {
return comparison{}, fmt.Errorf("from: %s", err)
}

toNumChildren, err := d.to.current.NumChildren()
toNumChildren, err := to.NumChildren()
if err != nil {
return comparison{}, fmt.Errorf("to: %s", err)
}
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy