Skip to content

Commit 2ba4a62

Browse files
kylecarbscoadler
andauthored
feat: Add high availability for multiple replicas (#4555)
* feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
1 parent dc3519e commit 2ba4a62

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+3434
-401
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"derphttp",
2020
"derpmap",
2121
"devel",
22+
"dflags",
2223
"drpc",
2324
"drpcconn",
2425
"drpcmux",
@@ -86,8 +87,10 @@
8687
"ptytest",
8788
"quickstart",
8889
"reconfig",
90+
"replicasync",
8991
"retrier",
9092
"rpty",
93+
"SCIM",
9194
"sdkproto",
9295
"sdktrace",
9396
"Signup",

agent/agent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
170170
if a.isClosed() {
171171
return
172172
}
173+
a.logger.Debug(ctx, "running tailnet with derpmap", slog.F("derpmap", derpMap))
173174
if a.network != nil {
174175
a.network.SetDERPMap(derpMap)
175176
return

agent/agent_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ func TestAgent(t *testing.T) {
465465

466466
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
467467
require.Eventually(t, func() bool {
468-
_, err := conn.Ping()
468+
_, err := conn.Ping(context.Background())
469469
return err == nil
470470
}, testutil.WaitMedium, testutil.IntervalFast)
471471
conn1, err := conn.DialContext(context.Background(), l.Addr().Network(), l.Addr().String())
@@ -483,9 +483,7 @@ func TestAgent(t *testing.T) {
483483

484484
t.Run("Speedtest", func(t *testing.T) {
485485
t.Parallel()
486-
if testing.Short() {
487-
t.Skip("The minimum duration for a speedtest is hardcoded in Tailscale to 5s!")
488-
}
486+
t.Skip("This test is relatively flakey because of Tailscale's speedtest code...")
489487
derpMap := tailnettest.RunDERPAndSTUN(t)
490488
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{
491489
DERPMap: derpMap,

cli/agent_test.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ import (
77
"github.com/stretchr/testify/assert"
88
"github.com/stretchr/testify/require"
99

10-
"cdr.dev/slog"
11-
1210
"github.com/coder/coder/cli/clitest"
1311
"github.com/coder/coder/coderd/coderdtest"
1412
"github.com/coder/coder/provisioner/echo"
@@ -67,11 +65,11 @@ func TestWorkspaceAgent(t *testing.T) {
6765
if assert.NotEmpty(t, workspace.LatestBuild.Resources) && assert.NotEmpty(t, resources[0].Agents) {
6866
assert.NotEmpty(t, resources[0].Agents[0].Version)
6967
}
70-
dialer, err := client.DialWorkspaceAgentTailnet(ctx, slog.Logger{}, resources[0].Agents[0].ID)
68+
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
7169
require.NoError(t, err)
7270
defer dialer.Close()
7371
require.Eventually(t, func() bool {
74-
_, err := dialer.Ping()
72+
_, err := dialer.Ping(ctx)
7573
return err == nil
7674
}, testutil.WaitMedium, testutil.IntervalFast)
7775
cancelFunc()
@@ -128,11 +126,11 @@ func TestWorkspaceAgent(t *testing.T) {
128126
if assert.NotEmpty(t, resources) && assert.NotEmpty(t, resources[0].Agents) {
129127
assert.NotEmpty(t, resources[0].Agents[0].Version)
130128
}
131-
dialer, err := client.DialWorkspaceAgentTailnet(ctx, slog.Logger{}, resources[0].Agents[0].ID)
129+
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
132130
require.NoError(t, err)
133131
defer dialer.Close()
134132
require.Eventually(t, func() bool {
135-
_, err := dialer.Ping()
133+
_, err := dialer.Ping(ctx)
136134
return err == nil
137135
}, testutil.WaitMedium, testutil.IntervalFast)
138136
cancelFunc()
@@ -189,11 +187,11 @@ func TestWorkspaceAgent(t *testing.T) {
189187
if assert.NotEmpty(t, resources) && assert.NotEmpty(t, resources[0].Agents) {
190188
assert.NotEmpty(t, resources[0].Agents[0].Version)
191189
}
192-
dialer, err := client.DialWorkspaceAgentTailnet(ctx, slog.Logger{}, resources[0].Agents[0].ID)
190+
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
193191
require.NoError(t, err)
194192
defer dialer.Close()
195193
require.Eventually(t, func() bool {
196-
_, err := dialer.Ping()
194+
_, err := dialer.Ping(ctx)
197195
return err == nil
198196
}, testutil.WaitMedium, testutil.IntervalFast)
199197
cancelFunc()

cli/config/file.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ func (r Root) Session() File {
1313
return File(filepath.Join(string(r), "session"))
1414
}
1515

16+
// ReplicaID is a unique identifier for the Coder server.
17+
func (r Root) ReplicaID() File {
18+
return File(filepath.Join(string(r), "replica_id"))
19+
}
20+
1621
func (r Root) URL() File {
1722
return File(filepath.Join(string(r), "url"))
1823
}

cli/configssh_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/stretchr/testify/assert"
2020
"github.com/stretchr/testify/require"
2121

22-
"cdr.dev/slog"
2322
"cdr.dev/slog/sloggers/slogtest"
2423

2524
"github.com/coder/coder/agent"
@@ -115,7 +114,7 @@ func TestConfigSSH(t *testing.T) {
115114
_ = agentCloser.Close()
116115
}()
117116
resources := coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
118-
agentConn, err := client.DialWorkspaceAgentTailnet(context.Background(), slog.Logger{}, resources[0].Agents[0].ID)
117+
agentConn, err := client.DialWorkspaceAgent(context.Background(), resources[0].Agents[0].ID, nil)
119118
require.NoError(t, err)
120119
defer agentConn.Close()
121120

cli/deployment/flags.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ func Flags() *codersdk.DeploymentFlags {
8585
Description: "Addresses for STUN servers to establish P2P connections. Set empty to disable P2P connections.",
8686
Default: []string{"stun.l.google.com:19302"},
8787
},
88+
DerpServerRelayAddress: &codersdk.StringFlag{
89+
Name: "DERP Server Relay Address",
90+
Flag: "derp-server-relay-address",
91+
EnvVar: "CODER_DERP_SERVER_RELAY_ADDRESS",
92+
Description: "An HTTP address that is accessible by other replicas to relay DERP traffic. Required for high availability.",
93+
Enterprise: true,
94+
},
8895
DerpConfigURL: &codersdk.StringFlag{
8996
Name: "DERP Config URL",
9097
Flag: "derp-config-url",

cli/portforward.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"github.com/spf13/cobra"
1717
"golang.org/x/xerrors"
1818

19-
"cdr.dev/slog"
2019
"github.com/coder/coder/agent"
2120
"github.com/coder/coder/cli/cliflag"
2221
"github.com/coder/coder/cli/cliui"
@@ -96,7 +95,7 @@ func portForward() *cobra.Command {
9695
return xerrors.Errorf("await agent: %w", err)
9796
}
9897

99-
conn, err := client.DialWorkspaceAgentTailnet(ctx, slog.Logger{}, workspaceAgent.ID)
98+
conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, nil)
10099
if err != nil {
101100
return err
102101
}
@@ -156,7 +155,7 @@ func portForward() *cobra.Command {
156155
case <-ticker.C:
157156
}
158157

159-
_, err = conn.Ping()
158+
_, err = conn.Ping(ctx)
160159
if err != nil {
161160
continue
162161
}

cli/root.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"flag"
66
"fmt"
7+
"io"
78
"net/http"
89
"net/url"
910
"os"
@@ -100,8 +101,9 @@ func Core() []*cobra.Command {
100101
}
101102

102103
func AGPL() []*cobra.Command {
103-
all := append(Core(), Server(deployment.Flags(), func(_ context.Context, o *coderd.Options) (*coderd.API, error) {
104-
return coderd.New(o), nil
104+
all := append(Core(), Server(deployment.Flags(), func(_ context.Context, o *coderd.Options) (*coderd.API, io.Closer, error) {
105+
api := coderd.New(o)
106+
return api, api, nil
105107
}))
106108
return all
107109
}

cli/server.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ import (
6969
)
7070

7171
// nolint:gocyclo
72-
func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *coderd.Options) (*coderd.API, error)) *cobra.Command {
72+
func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *coderd.Options) (*coderd.API, io.Closer, error)) *cobra.Command {
7373
root := &cobra.Command{
7474
Use: "server",
7575
Short: "Start a Coder server",
@@ -167,9 +167,10 @@ func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *code
167167
}
168168
defer listener.Close()
169169

170+
var tlsConfig *tls.Config
170171
if dflags.TLSEnable.Value {
171-
listener, err = configureServerTLS(
172-
listener, dflags.TLSMinVersion.Value,
172+
tlsConfig, err = configureTLS(
173+
dflags.TLSMinVersion.Value,
173174
dflags.TLSClientAuth.Value,
174175
dflags.TLSCertFiles.Value,
175176
dflags.TLSKeyFiles.Value,
@@ -178,6 +179,7 @@ func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *code
178179
if err != nil {
179180
return xerrors.Errorf("configure tls: %w", err)
180181
}
182+
listener = tls.NewListener(listener, tlsConfig)
181183
}
182184

183185
tcpAddr, valid := listener.Addr().(*net.TCPAddr)
@@ -328,6 +330,9 @@ func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *code
328330
Experimental: ExperimentalEnabled(cmd),
329331
DeploymentFlags: dflags,
330332
}
333+
if tlsConfig != nil {
334+
options.TLSCertificates = tlsConfig.Certificates
335+
}
331336

332337
if dflags.OAuth2GithubClientSecret.Value != "" {
333338
options.GithubOAuth2Config, err = configureGithubOAuth2(accessURLParsed,
@@ -471,11 +476,14 @@ func Server(dflags *codersdk.DeploymentFlags, newAPI func(context.Context, *code
471476
), dflags.PromAddress.Value, "prometheus")()
472477
}
473478

474-
coderAPI, err := newAPI(ctx, options)
479+
// We use a separate closer so the Enterprise API
480+
// can have it's own close functions. This is cleaner
481+
// than abstracting the Coder API itself.
482+
coderAPI, closer, err := newAPI(ctx, options)
475483
if err != nil {
476484
return err
477485
}
478-
defer coderAPI.Close()
486+
defer closer.Close()
479487

480488
client := codersdk.New(localURL)
481489
if dflags.TLSEnable.Value {
@@ -893,7 +901,7 @@ func loadCertificates(tlsCertFiles, tlsKeyFiles []string) ([]tls.Certificate, er
893901
return certs, nil
894902
}
895903

896-
func configureServerTLS(listener net.Listener, tlsMinVersion, tlsClientAuth string, tlsCertFiles, tlsKeyFiles []string, tlsClientCAFile string) (net.Listener, error) {
904+
func configureTLS(tlsMinVersion, tlsClientAuth string, tlsCertFiles, tlsKeyFiles []string, tlsClientCAFile string) (*tls.Config, error) {
897905
tlsConfig := &tls.Config{
898906
MinVersion: tls.VersionTLS12,
899907
}
@@ -929,6 +937,7 @@ func configureServerTLS(listener net.Listener, tlsMinVersion, tlsClientAuth stri
929937
if err != nil {
930938
return nil, xerrors.Errorf("load certificates: %w", err)
931939
}
940+
tlsConfig.Certificates = certs
932941
tlsConfig.GetCertificate = func(hi *tls.ClientHelloInfo) (*tls.Certificate, error) {
933942
// If there's only one certificate, return it.
934943
if len(certs) == 1 {
@@ -963,7 +972,7 @@ func configureServerTLS(listener net.Listener, tlsMinVersion, tlsClientAuth stri
963972
tlsConfig.ClientCAs = caPool
964973
}
965974

966-
return tls.NewListener(listener, tlsConfig), nil
975+
return tlsConfig, nil
967976
}
968977

969978
func configureGithubOAuth2(accessURL *url.URL, clientID, clientSecret string, allowSignups bool, allowOrgs []string, rawTeams []string, enterpriseBaseURL string) (*coderd.GithubOAuth2Config, error) {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy