Skip to content

Commit 4ed4069

Browse files
committed
feat: implement agent process management
- An opt-in feature has been added to the agent to allow deprioritizing non coder-related processes for both CPU and memory. Non coder processes have their niceness set to 10 and their oom_score_adj set to 100
1 parent f703a5b commit 4ed4069

File tree

4 files changed

+199
-0
lines changed

4 files changed

+199
-0
lines changed

agent/agent.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"os/exec"
1616
"os/user"
1717
"path/filepath"
18+
"runtime"
1819
"sort"
1920
"strconv"
2021
"strings"
@@ -34,6 +35,7 @@ import (
3435
"tailscale.com/types/netlogtype"
3536

3637
"cdr.dev/slog"
38+
"github.com/coder/coder/v2/agent/agentproc"
3739
"github.com/coder/coder/v2/agent/agentssh"
3840
"github.com/coder/coder/v2/agent/reconnectingpty"
3941
"github.com/coder/coder/v2/buildinfo"
@@ -51,6 +53,8 @@ const (
5153
ProtocolDial = "dial"
5254
)
5355

56+
const EnvProcMemNice = "CODER_PROC_MEMNICE_ENABLE"
57+
5458
type Options struct {
5559
Filesystem afero.Fs
5660
LogDir string
@@ -68,6 +72,7 @@ type Options struct {
6872
PrometheusRegistry *prometheus.Registry
6973
ReportMetadataInterval time.Duration
7074
ServiceBannerRefreshInterval time.Duration
75+
Syscaller agentproc.Syscaller
7176
}
7277

7378
type Client interface {
@@ -197,6 +202,7 @@ type agent struct {
197202

198203
prometheusRegistry *prometheus.Registry
199204
metrics *agentMetrics
205+
syscaller agentproc.Syscaller
200206
}
201207

202208
func (a *agent) TailnetConn() *tailnet.Conn {
@@ -225,6 +231,7 @@ func (a *agent) runLoop(ctx context.Context) {
225231
go a.reportLifecycleLoop(ctx)
226232
go a.reportMetadataLoop(ctx)
227233
go a.fetchServiceBannerLoop(ctx)
234+
go a.manageProcessPriorityLoop(ctx)
228235

229236
for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
230237
a.logger.Info(ctx, "connecting to coderd")
@@ -1253,6 +1260,84 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12531260
}
12541261
}
12551262

1263+
var exemptProcesses = []string{"coder"}
1264+
1265+
func (a *agent) manageProcessPriorityLoop(ctx context.Context) {
1266+
ticker := time.NewTicker(time.Minute)
1267+
defer ticker.Stop()
1268+
1269+
const (
1270+
procDir = agentproc.DefaultProcDir
1271+
niceness = 10
1272+
oomScoreAdj = -1000
1273+
)
1274+
1275+
if val := a.envVars[EnvProcMemNice]; val == "" || runtime.GOOS != "linux" {
1276+
a.logger.Info(ctx, "process priority not enabled, agent will not manage process niceness/oom_score_adj ",
1277+
slog.F("env_var", EnvProcMemNice),
1278+
slog.F("value", val),
1279+
slog.F("goos", runtime.GOOS),
1280+
)
1281+
return
1282+
}
1283+
1284+
for {
1285+
select {
1286+
case <-ticker.C:
1287+
procs, err := agentproc.List(a.filesystem, agentproc.DefaultProcDir)
1288+
if err != nil {
1289+
a.logger.Error(ctx, "failed to list procs",
1290+
slog.F("dir", agentproc.DefaultProcDir),
1291+
slog.Error(err),
1292+
)
1293+
continue
1294+
}
1295+
for _, proc := range procs {
1296+
// Trim off the path e.g. "./coder" -> "coder"
1297+
name := filepath.Base(proc.Name())
1298+
if slices.Contains(exemptProcesses, name) {
1299+
a.logger.Debug(ctx, "skipping exempt process",
1300+
slog.F("name", proc.Name()),
1301+
slog.F("pid", proc.PID),
1302+
)
1303+
continue
1304+
}
1305+
1306+
err := proc.SetNiceness(a.syscaller, niceness)
1307+
if err != nil {
1308+
a.logger.Error(ctx, "unable to set proc niceness",
1309+
slog.F("name", proc.Name()),
1310+
slog.F("pid", proc.PID),
1311+
slog.F("niceness", niceness),
1312+
slog.Error(err),
1313+
)
1314+
continue
1315+
}
1316+
1317+
err = proc.SetOOMAdj(oomScoreAdj)
1318+
if err != nil {
1319+
a.logger.Error(ctx, "unable to set proc oom_score_adj",
1320+
slog.F("name", proc.Name()),
1321+
slog.F("pid", proc.PID),
1322+
slog.F("oom_score_adj", oomScoreAdj),
1323+
slog.Error(err),
1324+
)
1325+
continue
1326+
}
1327+
1328+
a.logger.Debug(ctx, "deprioritized process",
1329+
slog.F("name", proc.Name()),
1330+
slog.F("pid", proc.PID),
1331+
slog.F("niceness", niceness),
1332+
slog.F("oom_score_adj", oomScoreAdj),
1333+
)
1334+
}
1335+
case <-ctx.Done():
1336+
return
1337+
}
1338+
}
1339+
}
1340+
12561341
// isClosed returns whether the API is closed or not.
12571342
func (a *agent) isClosed() bool {
12581343
select {

agent/agentproc/doc.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Package agentproc contains logic for interfacing with local
2+
// processes running in the same context as the agent.
3+
package agentproc

agent/agentproc/proc.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package agentproc
2+
3+
import (
4+
"path/filepath"
5+
"strconv"
6+
"strings"
7+
"syscall"
8+
9+
"github.com/spf13/afero"
10+
"golang.org/x/sys/unix"
11+
"golang.org/x/xerrors"
12+
)
13+
14+
const DefaultProcDir = "/proc"
15+
16+
type Syscaller interface {
17+
SetPriority(pid int32, priority int) error
18+
}
19+
20+
type UnixSyscaller struct{}
21+
22+
func (UnixSyscaller) SetPriority(pid int32, nice int) error {
23+
err := unix.Setpriority(unix.PRIO_PROCESS, int(pid), nice)
24+
if err != nil {
25+
return xerrors.Errorf("set priority: %w", err)
26+
}
27+
return nil
28+
}
29+
30+
type Process struct {
31+
Dir string
32+
CmdLine string
33+
PID int32
34+
fs afero.Fs
35+
}
36+
37+
func (p *Process) SetOOMAdj(score int) error {
38+
path := filepath.Join(p.Dir, "oom_score_adj")
39+
err := afero.WriteFile(p.fs,
40+
path,
41+
[]byte(strconv.Itoa(score)),
42+
0644,
43+
)
44+
if err != nil {
45+
return xerrors.Errorf("write %q: %w", path, err)
46+
}
47+
48+
return nil
49+
}
50+
51+
func (p *Process) SetNiceness(sc Syscaller, score int) error {
52+
err := sc.SetPriority(p.PID, score)
53+
if err != nil {
54+
return xerrors.Errorf("set priority for %q: %w", p.CmdLine, err)
55+
}
56+
return nil
57+
}
58+
59+
func (p *Process) Name() string {
60+
args := strings.Split(p.CmdLine, "\x00")
61+
// Split will always return at least one element.
62+
return args[0]
63+
}
64+
65+
func List(fs afero.Fs, dir string) ([]*Process, error) {
66+
d, err := fs.Open(dir)
67+
if err != nil {
68+
return nil, xerrors.Errorf("open dir %q: %w", dir, err)
69+
}
70+
71+
entries, err := d.Readdirnames(0)
72+
if err != nil {
73+
return nil, xerrors.Errorf("readdirnames: %w", err)
74+
}
75+
76+
processes := make([]*Process, 0, len(entries))
77+
for _, entry := range entries {
78+
pid, err := strconv.ParseInt(entry, 10, 32)
79+
if err != nil {
80+
continue
81+
}
82+
cmdline, err := afero.ReadFile(fs, filepath.Join(dir, entry, "cmdline"))
83+
if err != nil {
84+
var errNo syscall.Errno
85+
if xerrors.As(err, &errNo) && errNo == syscall.EPERM {
86+
continue
87+
}
88+
return nil, xerrors.Errorf("read cmdline: %w", err)
89+
}
90+
processes = append(processes, &Process{
91+
PID: int32(pid),
92+
CmdLine: string(cmdline),
93+
Dir: filepath.Join(dir, entry),
94+
fs: fs,
95+
})
96+
}
97+
98+
return processes, nil
99+
}

agent/agentproc/proc_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package agentproc_test
2+
3+
type mockSyscaller struct {
4+
SetPriorityFn func(int32, int) error
5+
}
6+
7+
func (f mockSyscaller) SetPriority(pid int32, nice int) error {
8+
if f.SetPriorityFn == nil {
9+
return nil
10+
}
11+
return f.SetPriorityFn(pid, nice)
12+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy