diff --git a/coderd/agentapi/api.go b/coderd/agentapi/api.go index 7f9fda63cb98c..3922dfc4bcad0 100644 --- a/coderd/agentapi/api.go +++ b/coderd/agentapi/api.go @@ -17,10 +17,12 @@ import ( "cdr.dev/slog" agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" "github.com/coder/coder/v2/coderd/appearance" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/externalauth" + "github.com/coder/coder/v2/coderd/notifications" "github.com/coder/coder/v2/coderd/prometheusmetrics" "github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/coderd/workspacestats" @@ -29,6 +31,7 @@ import ( "github.com/coder/coder/v2/codersdk/agentsdk" "github.com/coder/coder/v2/tailnet" tailnetproto "github.com/coder/coder/v2/tailnet/proto" + "github.com/coder/quartz" ) // API implements the DRPC agent API interface from agent/proto. This struct is @@ -59,7 +62,9 @@ type Options struct { Ctx context.Context Log slog.Logger + Clock quartz.Clock Database database.Store + NotificationsEnqueuer notifications.Enqueuer Pubsub pubsub.Pubsub DerpMapFn func() *tailcfg.DERPMap TailnetCoordinator *atomic.Pointer[tailnet.Coordinator] @@ -82,6 +87,10 @@ type Options struct { } func New(opts Options) *API { + if opts.Clock == nil { + opts.Clock = quartz.NewReal() + } + api := &API{ opts: opts, mu: sync.Mutex{}, @@ -104,9 +113,22 @@ func New(opts Options) *API { } api.ResourcesMonitoringAPI = &ResourcesMonitoringAPI{ - Log: opts.Log, - AgentID: opts.AgentID, - Database: opts.Database, + AgentID: opts.AgentID, + WorkspaceID: opts.WorkspaceID, + Clock: opts.Clock, + Database: opts.Database, + NotificationsEnqueuer: opts.NotificationsEnqueuer, + Debounce: 5 * time.Minute, + + Config: resourcesmonitor.Config{ + NumDatapoints: 20, + CollectionInterval: 10 * time.Second, + + Alert: resourcesmonitor.AlertConfig{ + MinimumNOKsPercent: 20, + ConsecutiveNOKsPercent: 50, + }, + }, } api.StatsAPI = &StatsAPI{ diff --git a/coderd/agentapi/resources_monitoring.go b/coderd/agentapi/resources_monitoring.go index 0bce9b5104be6..e21c9bc7581d8 100644 --- a/coderd/agentapi/resources_monitoring.go +++ b/coderd/agentapi/resources_monitoring.go @@ -4,20 +4,35 @@ import ( "context" "database/sql" "errors" + "fmt" + "time" "golang.org/x/xerrors" + "cdr.dev/slog" + "github.com/google/uuid" - "cdr.dev/slog" "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/quartz" ) type ResourcesMonitoringAPI struct { - AgentID uuid.UUID - Database database.Store - Log slog.Logger + AgentID uuid.UUID + WorkspaceID uuid.UUID + + Log slog.Logger + Clock quartz.Clock + Database database.Store + NotificationsEnqueuer notifications.Enqueuer + + Debounce time.Duration + Config resourcesmonitor.Config } func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context.Context, _ *proto.GetResourcesMonitoringConfigurationRequest) (*proto.GetResourcesMonitoringConfigurationResponse, error) { @@ -33,8 +48,8 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context return &proto.GetResourcesMonitoringConfigurationResponse{ Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{ - CollectionIntervalSeconds: 10, - NumDatapoints: 20, + CollectionIntervalSeconds: int32(a.Config.CollectionInterval.Seconds()), + NumDatapoints: a.Config.NumDatapoints, }, Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory { if memoryErr != nil { @@ -60,8 +75,182 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context } func (a *ResourcesMonitoringAPI) PushResourcesMonitoringUsage(ctx context.Context, req *proto.PushResourcesMonitoringUsageRequest) (*proto.PushResourcesMonitoringUsageResponse, error) { - a.Log.Info(ctx, "resources monitoring usage received", - slog.F("request", req)) + var err error + + if memoryErr := a.monitorMemory(ctx, req.Datapoints); memoryErr != nil { + err = errors.Join(err, xerrors.Errorf("monitor memory: %w", memoryErr)) + } + + if volumeErr := a.monitorVolumes(ctx, req.Datapoints); volumeErr != nil { + err = errors.Join(err, xerrors.Errorf("monitor volume: %w", volumeErr)) + } + + return &proto.PushResourcesMonitoringUsageResponse{}, err +} + +func (a *ResourcesMonitoringAPI) monitorMemory(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error { + monitor, err := a.Database.FetchMemoryResourceMonitorsByAgentID(ctx, a.AgentID) + if err != nil { + // It is valid for an agent to not have a memory monitor, so we + // do not want to treat it as an error. + if errors.Is(err, sql.ErrNoRows) { + return nil + } + + return xerrors.Errorf("fetch memory resource monitor: %w", err) + } + + if !monitor.Enabled { + return nil + } + + usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, 0, len(datapoints)) + for _, datapoint := range datapoints { + usageDatapoints = append(usageDatapoints, datapoint.Memory) + } + + usageStates := resourcesmonitor.CalculateMemoryUsageStates(monitor, usageDatapoints) + + oldState := monitor.State + newState := resourcesmonitor.NextState(a.Config, oldState, usageStates) + + debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState) + + //nolint:gocritic // We need to be able to update the resource monitor here. + err = a.Database.UpdateMemoryResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateMemoryResourceMonitorParams{ + AgentID: a.AgentID, + State: newState, + UpdatedAt: dbtime.Time(a.Clock.Now()), + DebouncedUntil: dbtime.Time(debouncedUntil), + }) + if err != nil { + return xerrors.Errorf("update workspace monitor: %w", err) + } + + if !shouldNotify { + return nil + } + + workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID) + if err != nil { + return xerrors.Errorf("get workspace by id: %w", err) + } + + _, err = a.NotificationsEnqueuer.EnqueueWithData( + // nolint:gocritic // We need to be able to send the notification. + dbauthz.AsNotifier(ctx), + workspace.OwnerID, + notifications.TemplateWorkspaceOutOfMemory, + map[string]string{ + "workspace": workspace.Name, + "threshold": fmt.Sprintf("%d%%", monitor.Threshold), + }, + map[string]any{ + // NOTE(DanielleMaywood): + // When notifications are enqueued, they are checked to be + // unique within a single day. This means that if we attempt + // to send two OOM notifications for the same workspace on + // the same day, the enqueuer will prevent us from sending + // a second one. We are inject a timestamp to make the + // notifications appear different enough to circumvent this + // deduplication logic. + "timestamp": a.Clock.Now(), + }, + "workspace-monitor-memory", + ) + if err != nil { + return xerrors.Errorf("notify workspace OOM: %w", err) + } + + return nil +} + +func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error { + volumeMonitors, err := a.Database.FetchVolumesResourceMonitorsByAgentID(ctx, a.AgentID) + if err != nil { + return xerrors.Errorf("get or insert volume monitor: %w", err) + } + + outOfDiskVolumes := make([]map[string]any, 0) + + for _, monitor := range volumeMonitors { + if !monitor.Enabled { + continue + } + + usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, 0, len(datapoints)) + for _, datapoint := range datapoints { + var usage *proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage + + for _, volume := range datapoint.Volumes { + if volume.Volume == monitor.Path { + usage = volume + break + } + } + + usageDatapoints = append(usageDatapoints, usage) + } + + usageStates := resourcesmonitor.CalculateVolumeUsageStates(monitor, usageDatapoints) + + oldState := monitor.State + newState := resourcesmonitor.NextState(a.Config, oldState, usageStates) + + debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState) + + if shouldNotify { + outOfDiskVolumes = append(outOfDiskVolumes, map[string]any{ + "path": monitor.Path, + "threshold": fmt.Sprintf("%d%%", monitor.Threshold), + }) + } + + //nolint:gocritic // We need to be able to update the resource monitor here. + if err := a.Database.UpdateVolumeResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateVolumeResourceMonitorParams{ + AgentID: a.AgentID, + Path: monitor.Path, + State: newState, + UpdatedAt: dbtime.Time(a.Clock.Now()), + DebouncedUntil: dbtime.Time(debouncedUntil), + }); err != nil { + return xerrors.Errorf("update workspace monitor: %w", err) + } + } + + if len(outOfDiskVolumes) == 0 { + return nil + } + + workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID) + if err != nil { + return xerrors.Errorf("get workspace by id: %w", err) + } + + if _, err := a.NotificationsEnqueuer.EnqueueWithData( + // nolint:gocritic // We need to be able to send the notification. + dbauthz.AsNotifier(ctx), + workspace.OwnerID, + notifications.TemplateWorkspaceOutOfDisk, + map[string]string{ + "workspace": workspace.Name, + }, + map[string]any{ + "volumes": outOfDiskVolumes, + // NOTE(DanielleMaywood): + // When notifications are enqueued, they are checked to be + // unique within a single day. This means that if we attempt + // to send two OOM notifications for the same workspace on + // the same day, the enqueuer will prevent us from sending + // a second one. We are inject a timestamp to make the + // notifications appear different enough to circumvent this + // deduplication logic. + "timestamp": a.Clock.Now(), + }, + "workspace-monitor-volumes", + ); err != nil { + return xerrors.Errorf("notify workspace OOD: %w", err) + } - return &proto.PushResourcesMonitoringUsageResponse{}, nil + return nil } diff --git a/coderd/agentapi/resources_monitoring_test.go b/coderd/agentapi/resources_monitoring_test.go new file mode 100644 index 0000000000000..087ccfd24e459 --- /dev/null +++ b/coderd/agentapi/resources_monitoring_test.go @@ -0,0 +1,944 @@ +package agentapi_test + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/coder/v2/coderd/notifications/notificationstest" + "github.com/coder/quartz" +) + +func resourceMonitorAPI(t *testing.T) (*agentapi.ResourcesMonitoringAPI, database.User, *quartz.Mock, *notificationstest.FakeEnqueuer) { + t.Helper() + + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + template := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{ + TemplateID: uuid.NullUUID{Valid: true, UUID: template.ID}, + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + workspace := dbgen.Workspace(t, db, database.WorkspaceTable{ + OrganizationID: org.ID, + TemplateID: template.ID, + OwnerID: user.ID, + }) + job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ + Type: database.ProvisionerJobTypeWorkspaceBuild, + }) + build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + JobID: job.ID, + WorkspaceID: workspace.ID, + TemplateVersionID: templateVersion.ID, + }) + resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ + JobID: build.JobID, + }) + agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ + ResourceID: resource.ID, + }) + + notifyEnq := ¬ificationstest.FakeEnqueuer{} + clock := quartz.NewMock(t) + + return &agentapi.ResourcesMonitoringAPI{ + AgentID: agent.ID, + WorkspaceID: workspace.ID, + Clock: clock, + Database: db, + NotificationsEnqueuer: notifyEnq, + Config: resourcesmonitor.Config{ + NumDatapoints: 20, + CollectionInterval: 10 * time.Second, + + Alert: resourcesmonitor.AlertConfig{ + MinimumNOKsPercent: 20, + ConsecutiveNOKsPercent: 50, + }, + }, + Debounce: 1 * time.Minute, + }, user, clock, notifyEnq +} + +func TestMemoryResourceMonitorDebounce(t *testing.T) { + t.Parallel() + + // This test is a bit of a long one. We're testing that + // when a monitor goes into an alert state, it doesn't + // allow another notification to occur until after the + // debounce period. + // + // 1. OK -> NOK |> sends a notification + // 2. NOK -> OK |> does nothing + // 3. OK -> NOK |> does nothing due to debounce period + // 4. NOK -> OK |> does nothing + // 5. OK -> NOK |> sends a notification as debounce period exceeded + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 100 + + // Given: A monitor in an OK state + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: The monitor is given a state that will trigger NOK + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect there to be a notification sent + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + notifyEnq.Clear() + + // When: The monitor moves to an OK state from NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to a NOK state before the debounced time. + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no new notifications (showing the debouncer working) + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to an OK state from NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We still expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to a NOK state after the debounce period. + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect a notification + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) +} + +func TestMemoryResourceMonitor(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + memoryUsage []int64 + memoryTotal int64 + previousState database.WorkspaceAgentMonitorState + expectState database.WorkspaceAgentMonitorState + shouldNotify bool + }{ + { + name: "WhenOK/NeverExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ShouldStayInOK", + memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ConsecutiveExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenOK/MinimumExceedsThreshold", + memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenNOK/NeverExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ShouldStayInNOK", + memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ConsecutiveExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/MinimumExceedsThreshold", + memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + + datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.memoryUsage)) + collectedAt := clock.Now() + for _, usage := range tt.memoryUsage { + collectedAt = collectedAt.Add(15 * time.Second) + datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + CollectedAt: timestamppb.New(collectedAt), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: usage, + Total: tt.memoryTotal, + }, + }) + } + + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: tt.previousState, + Threshold: 80, + }) + + clock.Set(collectedAt) + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: datapoints, + }) + require.NoError(t, err) + + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + if tt.shouldNotify { + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + } else { + require.Len(t, sent, 0) + } + }) + } +} + +func TestMemoryResourceMonitorMissingData(t *testing.T) { + t.Parallel() + + t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) { + t.Parallel() + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in an OK state. + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two NOK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Memory: nil, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no notifications, as this unknown prevents us knowing we should alert. + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + + // Then: We expect the monitor to still be in an OK state. + monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitor.State) + }) + + t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) { + t.Parallel() + + api, _, clock, _ := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in a NOK state. + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two OK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Memory: nil, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect the monitor to still be in a NOK state. + monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitor.State) + }) +} + +func TestVolumeResourceMonitorDebounce(t *testing.T) { + t.Parallel() + + // This test is an even longer one. We're testing + // that the debounce logic is independent per + // volume monitor. We interleave the triggering + // of each monitor to ensure the debounce logic + // is monitor independent. + // + // First Monitor: + // 1. OK -> NOK |> sends a notification + // 2. NOK -> OK |> does nothing + // 3. OK -> NOK |> does nothing due to debounce period + // 4. NOK -> OK |> does nothing + // 5. OK -> NOK |> sends a notification as debounce period exceeded + // 6. NOK -> OK |> does nothing + // + // Second Monitor: + // 1. OK -> OK |> does nothing + // 2. OK -> NOK |> sends a notification + // 3. NOK -> OK |> does nothing + // 4. OK -> NOK |> does nothing due to debounce period + // 5. NOK -> OK |> does nothing + // 6. OK -> NOK |> sends a notification as debounce period exceeded + // + + firstVolumePath := "/home/coder" + secondVolumePath := "/dev/coder" + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + + // Given: + // - First monitor in an OK state + // - Second monitor in an OK state + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: firstVolumePath, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: secondVolumePath, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: + // - First monitor is in a NOK state + // - Second monitor is in an OK state + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the first monitor + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes := requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, firstVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First monitor moves back to OK + // - Second monitor moves to NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the second monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, secondVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First monitor moves back to NOK before debounce period has ended + // - Second monitor moves back to OK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: + // - First monitor moves back to OK + // - Second monitor moves back to NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect no new notifications. + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: + // - First monitor moves back to a NOK state after the debounce period + // - Second monitor moves back to OK + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the first monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, firstVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First montior moves back to OK + // - Second monitor moves back to NOK after the debounce period + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the second monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, secondVolumePath, volumes[0]["path"]) +} + +func TestVolumeResourceMonitor(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + volumePath string + volumeUsage []int64 + volumeTotal int64 + thresholdPercent int32 + previousState database.WorkspaceAgentMonitorState + expectState database.WorkspaceAgentMonitorState + shouldNotify bool + }{ + { + name: "WhenOK/NeverExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ShouldStayInOK", + volumePath: "/home/coder", + volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ConsecutiveExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenOK/MinimumExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenNOK/NeverExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ShouldStayInNOK", + volumePath: "/home/coder", + volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ConsecutiveExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/MinimumExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + + datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.volumeUsage)) + collectedAt := clock.Now() + for _, volumeUsage := range tt.volumeUsage { + collectedAt = collectedAt.Add(15 * time.Second) + + volumeDatapoints := []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: tt.volumePath, + Used: volumeUsage, + Total: tt.volumeTotal, + }, + } + + datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + CollectedAt: timestamppb.New(collectedAt), + Volumes: volumeDatapoints, + }) + } + + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: tt.volumePath, + State: tt.previousState, + Threshold: tt.thresholdPercent, + }) + + clock.Set(collectedAt) + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: datapoints, + }) + require.NoError(t, err) + + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + if tt.shouldNotify { + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + } else { + require.Len(t, sent, 0) + } + }) + } +} + +func TestVolumeResourceMonitorMultiple(t *testing.T) { + t.Parallel() + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 100 + + // Given: two different volume resource monitors + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: "/home/coder", + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: "/dev/coder", + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: both of them move to a NOK state + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: "/home/coder", + Used: 10, + Total: 10, + }, + { + Volume: "/dev/coder", + Used: 10, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect a notification to alert with information about both + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + + volumes := requireVolumeData(t, sent[0]) + require.Len(t, volumes, 2) + require.Equal(t, "/home/coder", volumes[0]["path"]) + require.Equal(t, "/dev/coder", volumes[1]["path"]) +} + +func TestVolumeResourceMonitorMissingData(t *testing.T) { + t.Parallel() + + t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) { + t.Parallel() + + volumePath := "/home/coder" + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in an OK state. + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: volumePath, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two NOK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 10, + Total: 10, + }, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{}, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 10, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no notifications, as this unknown prevents us knowing we should alert. + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + + // Then: We expect the monitor to still be in an OK state. + monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Len(t, monitors, 1) + require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitors[0].State) + }) + + t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) { + t.Parallel() + + volumePath := "/home/coder" + + api, _, clock, _ := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in a NOK state. + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: volumePath, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two OK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 1, + Total: 10, + }, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{}, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 1, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect the monitor to still be in a NOK state. + monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Len(t, monitors, 1) + require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitors[0].State) + }) +} + +func requireVolumeData(t *testing.T, notif *notificationstest.FakeNotification) []map[string]any { + t.Helper() + + volumesData := notif.Data["volumes"] + require.IsType(t, []map[string]any{}, volumesData) + + return volumesData.([]map[string]any) +} diff --git a/coderd/agentapi/resourcesmonitor/resources_monitor.go b/coderd/agentapi/resourcesmonitor/resources_monitor.go new file mode 100644 index 0000000000000..9b1749cd0abd6 --- /dev/null +++ b/coderd/agentapi/resourcesmonitor/resources_monitor.go @@ -0,0 +1,129 @@ +package resourcesmonitor + +import ( + "math" + "time" + + "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/util/slice" +) + +type State int + +const ( + StateOK State = iota + StateNOK + StateUnknown +) + +type AlertConfig struct { + // What percentage of datapoints in a row are + // required to put the monitor in an alert state. + ConsecutiveNOKsPercent int + + // What percentage of datapoints in a window are + // required to put the monitor in an alert state. + MinimumNOKsPercent int +} + +type Config struct { + // How many datapoints should the agent send + NumDatapoints int32 + + // How long between each datapoint should + // collection occur. + CollectionInterval time.Duration + + Alert AlertConfig +} + +func CalculateMemoryUsageStates( + monitor database.WorkspaceAgentMemoryResourceMonitor, + datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, +) []State { + states := make([]State, 0, len(datapoints)) + + for _, datapoint := range datapoints { + state := StateUnknown + + if datapoint != nil { + percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100) + + if percent < monitor.Threshold { + state = StateOK + } else { + state = StateNOK + } + } + + states = append(states, state) + } + + return states +} + +func CalculateVolumeUsageStates( + monitor database.WorkspaceAgentVolumeResourceMonitor, + datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, +) []State { + states := make([]State, 0, len(datapoints)) + + for _, datapoint := range datapoints { + state := StateUnknown + + if datapoint != nil { + percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100) + + if percent < monitor.Threshold { + state = StateOK + } else { + state = StateNOK + } + } + + states = append(states, state) + } + + return states +} + +func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []State) database.WorkspaceAgentMonitorState { + // If there are enough consecutive NOK states, we should be in an + // alert state. + consecutiveNOKs := slice.CountConsecutive(StateNOK, states...) + if percent(consecutiveNOKs, len(states)) >= c.Alert.ConsecutiveNOKsPercent { + return database.WorkspaceAgentMonitorStateNOK + } + + // We do not explicitly handle StateUnknown because it could have + // been either StateOK or StateNOK if collection didn't fail. As + // it could be either, our best bet is to ignore it. + nokCount, okCount := 0, 0 + for _, state := range states { + switch state { + case StateOK: + okCount++ + case StateNOK: + nokCount++ + } + } + + // If there are enough NOK datapoints, we should be in an alert state. + if percent(nokCount, len(states)) >= c.Alert.MinimumNOKsPercent { + return database.WorkspaceAgentMonitorStateNOK + } + + // If all datapoints are OK, we should be in an OK state + if okCount == len(states) { + return database.WorkspaceAgentMonitorStateOK + } + + // Otherwise we stay in the same state as last. + return oldState +} + +func percent[T int](numerator, denominator T) int { + percent := float64(numerator*100) / float64(denominator) + return int(math.Round(percent)) +} diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 89a17ce580d04..9e616dd79dcbc 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -289,6 +289,24 @@ var ( Scope: rbac.ScopeAll, }.WithCachedASTValue() + subjectResourceMonitor = rbac.Subject{ + FriendlyName: "Resource Monitor", + ID: uuid.Nil.String(), + Roles: rbac.Roles([]rbac.Role{ + { + Identifier: rbac.RoleIdentifier{Name: "resourcemonitor"}, + DisplayName: "Resource Monitor", + Site: rbac.Permissions(map[string][]policy.Action{ + // The workspace monitor needs to be able to update monitors + rbac.ResourceWorkspaceAgentResourceMonitor.Type: {policy.ActionUpdate}, + }), + Org: map[string][]rbac.Permission{}, + User: []rbac.Permission{}, + }, + }), + Scope: rbac.ScopeAll, + }.WithCachedASTValue() + subjectSystemRestricted = rbac.Subject{ FriendlyName: "System", ID: uuid.Nil.String(), @@ -376,6 +394,12 @@ func AsNotifier(ctx context.Context) context.Context { return context.WithValue(ctx, authContextKey{}, subjectNotifier) } +// AsResourceMonitor returns a context with an actor that has permissions required for +// updating resource monitors. +func AsResourceMonitor(ctx context.Context) context.Context { + return context.WithValue(ctx, authContextKey{}, subjectResourceMonitor) +} + // AsSystemRestricted returns a context with an actor that has permissions // required for various system operations (login, logout, metrics cache). func AsSystemRestricted(ctx context.Context) context.Context { @@ -3677,6 +3701,14 @@ func (q *querier) UpdateMemberRoles(ctx context.Context, arg database.UpdateMemb return q.db.UpdateMemberRoles(ctx, arg) } +func (q *querier) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil { + return err + } + + return q.db.UpdateMemoryResourceMonitor(ctx, arg) +} + func (q *querier) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceNotificationTemplate); err != nil { return database.NotificationTemplate{}, err @@ -4073,6 +4105,14 @@ func (q *querier) UpdateUserStatus(ctx context.Context, arg database.UpdateUserS return updateWithReturn(q.log, q.auth, fetch, q.db.UpdateUserStatus)(ctx, arg) } +func (q *querier) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil { + return err + } + + return q.db.UpdateVolumeResourceMonitor(ctx, arg) +} + func (q *querier) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { fetch := func(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { w, err := q.db.GetWorkspaceByID(ctx, arg.ID) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 24ecf0b8eca47..3bf63c3300f13 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -4725,43 +4725,78 @@ func (s *MethodTestSuite) TestOAuth2ProviderAppTokens() { } func (s *MethodTestSuite) TestResourcesMonitor() { - s.Run("InsertMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { - dbtestutil.DisableForeignKeysAndTriggers(s.T(), db) - check.Args(database.InsertMemoryResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) - })) + createAgent := func(t *testing.T, db database.Store) (database.WorkspaceAgent, database.WorkspaceTable) { + t.Helper() - s.Run("InsertVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { - dbtestutil.DisableForeignKeysAndTriggers(s.T(), db) - check.Args(database.InsertVolumeResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) - })) - - s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { - u := dbgen.User(s.T(), db, database.User{}) - o := dbgen.Organization(s.T(), db, database.Organization{}) - tpl := dbgen.Template(s.T(), db, database.Template{ + u := dbgen.User(t, db, database.User{}) + o := dbgen.Organization(t, db, database.Organization{}) + tpl := dbgen.Template(t, db, database.Template{ OrganizationID: o.ID, CreatedBy: u.ID, }) - tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ + tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{ TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, OrganizationID: o.ID, CreatedBy: u.ID, }) - w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{ + w := dbgen.Workspace(t, db, database.WorkspaceTable{ TemplateID: tpl.ID, OrganizationID: o.ID, OwnerID: u.ID, }) - j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{ + j := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ Type: database.ProvisionerJobTypeWorkspaceBuild, }) - b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{ + b := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ JobID: j.ID, WorkspaceID: w.ID, TemplateVersionID: tv.ID, }) - res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID}) - agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID}) + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: b.JobID}) + agt := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) + + return agt, w + } + + s.Run("InsertMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.InsertMemoryResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) + })) + + s.Run("InsertVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.InsertVolumeResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) + })) + + s.Run("UpdateMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.UpdateMemoryResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionUpdate) + })) + + s.Run("UpdateVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.UpdateVolumeResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionUpdate) + })) + + s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { + agt, w := createAgent(s.T(), db) + dbgen.WorkspaceAgentMemoryResourceMonitor(s.T(), db, database.WorkspaceAgentMemoryResourceMonitor{ AgentID: agt.ID, Enabled: true, @@ -4776,32 +4811,8 @@ func (s *MethodTestSuite) TestResourcesMonitor() { })) s.Run("FetchVolumesResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { - u := dbgen.User(s.T(), db, database.User{}) - o := dbgen.Organization(s.T(), db, database.Organization{}) - tpl := dbgen.Template(s.T(), db, database.Template{ - OrganizationID: o.ID, - CreatedBy: u.ID, - }) - tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ - TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, - OrganizationID: o.ID, - CreatedBy: u.ID, - }) - w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{ - TemplateID: tpl.ID, - OrganizationID: o.ID, - OwnerID: u.ID, - }) - j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{ - Type: database.ProvisionerJobTypeWorkspaceBuild, - }) - b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{ - JobID: j.ID, - WorkspaceID: w.ID, - TemplateVersionID: tv.ID, - }) - res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID}) - agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID}) + agt, w := createAgent(s.T(), db) + dbgen.WorkspaceAgentVolumeResourceMonitor(s.T(), db, database.WorkspaceAgentVolumeResourceMonitor{ AgentID: agt.ID, Path: "/var/lib", diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index cfd360f740183..9c4ebbe8bb8ca 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -1038,10 +1038,13 @@ func OAuth2ProviderAppToken(t testing.TB, db database.Store, seed database.OAuth func WorkspaceAgentMemoryResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentMemoryResourceMonitor) database.WorkspaceAgentMemoryResourceMonitor { monitor, err := db.InsertMemoryResourceMonitor(genCtx, database.InsertMemoryResourceMonitorParams{ - AgentID: takeFirst(seed.AgentID, uuid.New()), - Enabled: takeFirst(seed.Enabled, true), - Threshold: takeFirst(seed.Threshold, 100), - CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + AgentID: takeFirst(seed.AgentID, uuid.New()), + Enabled: takeFirst(seed.Enabled, true), + State: takeFirst(seed.State, database.WorkspaceAgentMonitorStateOK), + Threshold: takeFirst(seed.Threshold, 100), + CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + UpdatedAt: takeFirst(seed.UpdatedAt, dbtime.Now()), + DebouncedUntil: takeFirst(seed.DebouncedUntil, time.Time{}), }) require.NoError(t, err, "insert workspace agent memory resource monitor") return monitor @@ -1049,11 +1052,14 @@ func WorkspaceAgentMemoryResourceMonitor(t testing.TB, db database.Store, seed d func WorkspaceAgentVolumeResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentVolumeResourceMonitor) database.WorkspaceAgentVolumeResourceMonitor { monitor, err := db.InsertVolumeResourceMonitor(genCtx, database.InsertVolumeResourceMonitorParams{ - AgentID: takeFirst(seed.AgentID, uuid.New()), - Path: takeFirst(seed.Path, "/"), - Enabled: takeFirst(seed.Enabled, true), - Threshold: takeFirst(seed.Threshold, 100), - CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + AgentID: takeFirst(seed.AgentID, uuid.New()), + Path: takeFirst(seed.Path, "/"), + Enabled: takeFirst(seed.Enabled, true), + State: takeFirst(seed.State, database.WorkspaceAgentMonitorStateOK), + Threshold: takeFirst(seed.Threshold, 100), + CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + UpdatedAt: takeFirst(seed.UpdatedAt, dbtime.Now()), + DebouncedUntil: takeFirst(seed.DebouncedUntil, time.Time{}), }) require.NoError(t, err, "insert workspace agent volume resource monitor") return monitor diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 808e7b1a8a16c..7f56ea5f463e5 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -7989,7 +7989,16 @@ func (q *FakeQuerier) InsertMemoryResourceMonitor(_ context.Context, arg databas q.mutex.Lock() defer q.mutex.Unlock() - monitor := database.WorkspaceAgentMemoryResourceMonitor(arg) + //nolint:unconvert // The structs field-order differs so this is needed. + monitor := database.WorkspaceAgentMemoryResourceMonitor(database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: arg.AgentID, + Enabled: arg.Enabled, + State: arg.State, + Threshold: arg.Threshold, + CreatedAt: arg.CreatedAt, + UpdatedAt: arg.UpdatedAt, + DebouncedUntil: arg.DebouncedUntil, + }) q.workspaceAgentMemoryResourceMonitors = append(q.workspaceAgentMemoryResourceMonitors, monitor) return monitor, nil @@ -8676,11 +8685,14 @@ func (q *FakeQuerier) InsertVolumeResourceMonitor(_ context.Context, arg databas defer q.mutex.Unlock() monitor := database.WorkspaceAgentVolumeResourceMonitor{ - AgentID: arg.AgentID, - Path: arg.Path, - Enabled: arg.Enabled, - Threshold: arg.Threshold, - CreatedAt: arg.CreatedAt, + AgentID: arg.AgentID, + Path: arg.Path, + Enabled: arg.Enabled, + State: arg.State, + Threshold: arg.Threshold, + CreatedAt: arg.CreatedAt, + UpdatedAt: arg.UpdatedAt, + DebouncedUntil: arg.DebouncedUntil, } q.workspaceAgentVolumeResourceMonitors = append(q.workspaceAgentVolumeResourceMonitors, monitor) @@ -9691,6 +9703,30 @@ func (q *FakeQuerier) UpdateMemberRoles(_ context.Context, arg database.UpdateMe return database.OrganizationMember{}, sql.ErrNoRows } +func (q *FakeQuerier) UpdateMemoryResourceMonitor(_ context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.Lock() + defer q.mutex.Unlock() + + for i, monitor := range q.workspaceAgentMemoryResourceMonitors { + if monitor.AgentID != arg.AgentID { + continue + } + + monitor.State = arg.State + monitor.UpdatedAt = arg.UpdatedAt + monitor.DebouncedUntil = arg.DebouncedUntil + q.workspaceAgentMemoryResourceMonitors[i] = monitor + return nil + } + + return nil +} + func (*FakeQuerier) UpdateNotificationTemplateMethodByID(_ context.Context, _ database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { // Not implementing this function because it relies on state in the database which is created with migrations. // We could consider using code-generation to align the database state and dbmem, but it's not worth it right now. @@ -10469,6 +10505,30 @@ func (q *FakeQuerier) UpdateUserStatus(_ context.Context, arg database.UpdateUse return database.User{}, sql.ErrNoRows } +func (q *FakeQuerier) UpdateVolumeResourceMonitor(_ context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.Lock() + defer q.mutex.Unlock() + + for i, monitor := range q.workspaceAgentVolumeResourceMonitors { + if monitor.AgentID != arg.AgentID || monitor.Path != arg.Path { + continue + } + + monitor.State = arg.State + monitor.UpdatedAt = arg.UpdatedAt + monitor.DebouncedUntil = arg.DebouncedUntil + q.workspaceAgentVolumeResourceMonitors[i] = monitor + return nil + } + + return nil +} + func (q *FakeQuerier) UpdateWorkspace(_ context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { if err := validateDatabaseType(arg); err != nil { return database.WorkspaceTable{}, err diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index fc84f556aabfb..665c10658a5bc 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -2331,6 +2331,13 @@ func (m queryMetricsStore) UpdateMemberRoles(ctx context.Context, arg database.U return member, err } +func (m queryMetricsStore) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + start := time.Now() + r0 := m.s.UpdateMemoryResourceMonitor(ctx, arg) + m.queryLatencies.WithLabelValues("UpdateMemoryResourceMonitor").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { start := time.Now() r0, r1 := m.s.UpdateNotificationTemplateMethodByID(ctx, arg) @@ -2569,6 +2576,13 @@ func (m queryMetricsStore) UpdateUserStatus(ctx context.Context, arg database.Up return user, err } +func (m queryMetricsStore) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + start := time.Now() + r0 := m.s.UpdateVolumeResourceMonitor(ctx, arg) + m.queryLatencies.WithLabelValues("UpdateVolumeResourceMonitor").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { start := time.Now() workspace, err := m.s.UpdateWorkspace(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index d51631316a3cd..c7711505d7d51 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -4965,6 +4965,20 @@ func (mr *MockStoreMockRecorder) UpdateMemberRoles(ctx, arg any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateMemberRoles", reflect.TypeOf((*MockStore)(nil).UpdateMemberRoles), ctx, arg) } +// UpdateMemoryResourceMonitor mocks base method. +func (m *MockStore) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateMemoryResourceMonitor", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateMemoryResourceMonitor indicates an expected call of UpdateMemoryResourceMonitor. +func (mr *MockStoreMockRecorder) UpdateMemoryResourceMonitor(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateMemoryResourceMonitor", reflect.TypeOf((*MockStore)(nil).UpdateMemoryResourceMonitor), ctx, arg) +} + // UpdateNotificationTemplateMethodByID mocks base method. func (m *MockStore) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { m.ctrl.T.Helper() @@ -5456,6 +5470,20 @@ func (mr *MockStoreMockRecorder) UpdateUserStatus(ctx, arg any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateUserStatus", reflect.TypeOf((*MockStore)(nil).UpdateUserStatus), ctx, arg) } +// UpdateVolumeResourceMonitor mocks base method. +func (m *MockStore) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateVolumeResourceMonitor", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateVolumeResourceMonitor indicates an expected call of UpdateVolumeResourceMonitor. +func (mr *MockStoreMockRecorder) UpdateVolumeResourceMonitor(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateVolumeResourceMonitor", reflect.TypeOf((*MockStore)(nil).UpdateVolumeResourceMonitor), ctx, arg) +} + // UpdateWorkspace mocks base method. func (m *MockStore) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { m.ctrl.T.Helper() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 44bf68a36eb40..e699b34bd5433 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -244,6 +244,11 @@ CREATE TYPE workspace_agent_lifecycle_state AS ENUM ( 'off' ); +CREATE TYPE workspace_agent_monitor_state AS ENUM ( + 'OK', + 'NOK' +); + CREATE TYPE workspace_agent_script_timing_stage AS ENUM ( 'start', 'stop', @@ -1510,7 +1515,10 @@ CREATE TABLE workspace_agent_memory_resource_monitors ( agent_id uuid NOT NULL, enabled boolean NOT NULL, threshold integer NOT NULL, - created_at timestamp with time zone NOT NULL + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + state workspace_agent_monitor_state DEFAULT 'OK'::workspace_agent_monitor_state NOT NULL, + debounced_until timestamp with time zone DEFAULT '0001-01-01 00:00:00+00'::timestamp with time zone NOT NULL ); CREATE UNLOGGED TABLE workspace_agent_metadata ( @@ -1595,7 +1603,10 @@ CREATE TABLE workspace_agent_volume_resource_monitors ( enabled boolean NOT NULL, threshold integer NOT NULL, path text NOT NULL, - created_at timestamp with time zone NOT NULL + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + state workspace_agent_monitor_state DEFAULT 'OK'::workspace_agent_monitor_state NOT NULL, + debounced_until timestamp with time zone DEFAULT '0001-01-01 00:00:00+00'::timestamp with time zone NOT NULL ); CREATE TABLE workspace_agents ( diff --git a/coderd/database/migrations/000294_workspace_monitors_state.down.sql b/coderd/database/migrations/000294_workspace_monitors_state.down.sql new file mode 100644 index 0000000000000..c3c6ce7c614ac --- /dev/null +++ b/coderd/database/migrations/000294_workspace_monitors_state.down.sql @@ -0,0 +1,11 @@ +ALTER TABLE workspace_agent_volume_resource_monitors + DROP COLUMN updated_at, + DROP COLUMN state, + DROP COLUMN debounced_until; + +ALTER TABLE workspace_agent_memory_resource_monitors + DROP COLUMN updated_at, + DROP COLUMN state, + DROP COLUMN debounced_until; + +DROP TYPE workspace_agent_monitor_state; diff --git a/coderd/database/migrations/000294_workspace_monitors_state.up.sql b/coderd/database/migrations/000294_workspace_monitors_state.up.sql new file mode 100644 index 0000000000000..a6b1f7609d7da --- /dev/null +++ b/coderd/database/migrations/000294_workspace_monitors_state.up.sql @@ -0,0 +1,14 @@ +CREATE TYPE workspace_agent_monitor_state AS ENUM ( + 'OK', + 'NOK' +); + +ALTER TABLE workspace_agent_memory_resource_monitors + ADD COLUMN updated_at timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP, + ADD COLUMN state workspace_agent_monitor_state NOT NULL DEFAULT 'OK', + ADD COLUMN debounced_until timestamp with time zone NOT NULL DEFAULT '0001-01-01 00:00:00'::timestamptz; + +ALTER TABLE workspace_agent_volume_resource_monitors + ADD COLUMN updated_at timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP, + ADD COLUMN state workspace_agent_monitor_state NOT NULL DEFAULT 'OK', + ADD COLUMN debounced_until timestamp with time zone NOT NULL DEFAULT '0001-01-01 00:00:00'::timestamptz; diff --git a/coderd/database/modelmethods.go b/coderd/database/modelmethods.go index 63e03ccb27f40..171c0454563de 100644 --- a/coderd/database/modelmethods.go +++ b/coderd/database/modelmethods.go @@ -527,3 +527,31 @@ func (k CryptoKey) CanVerify(now time.Time) bool { func (r GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerRow) RBACObject() rbac.Object { return r.ProvisionerJob.RBACObject() } + +func (m WorkspaceAgentMemoryResourceMonitor) Debounce( + by time.Duration, + now time.Time, + oldState, newState WorkspaceAgentMonitorState, +) (time.Time, bool) { + if now.After(m.DebouncedUntil) && + oldState == WorkspaceAgentMonitorStateOK && + newState == WorkspaceAgentMonitorStateNOK { + return now.Add(by), true + } + + return m.DebouncedUntil, false +} + +func (m WorkspaceAgentVolumeResourceMonitor) Debounce( + by time.Duration, + now time.Time, + oldState, newState WorkspaceAgentMonitorState, +) (debouncedUntil time.Time, shouldNotify bool) { + if now.After(m.DebouncedUntil) && + oldState == WorkspaceAgentMonitorStateOK && + newState == WorkspaceAgentMonitorStateNOK { + return now.Add(by), true + } + + return m.DebouncedUntil, false +} diff --git a/coderd/database/models.go b/coderd/database/models.go index 9ddcba7897699..5411591eed51c 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1976,6 +1976,64 @@ func AllWorkspaceAgentLifecycleStateValues() []WorkspaceAgentLifecycleState { } } +type WorkspaceAgentMonitorState string + +const ( + WorkspaceAgentMonitorStateOK WorkspaceAgentMonitorState = "OK" + WorkspaceAgentMonitorStateNOK WorkspaceAgentMonitorState = "NOK" +) + +func (e *WorkspaceAgentMonitorState) Scan(src interface{}) error { + switch s := src.(type) { + case []byte: + *e = WorkspaceAgentMonitorState(s) + case string: + *e = WorkspaceAgentMonitorState(s) + default: + return fmt.Errorf("unsupported scan type for WorkspaceAgentMonitorState: %T", src) + } + return nil +} + +type NullWorkspaceAgentMonitorState struct { + WorkspaceAgentMonitorState WorkspaceAgentMonitorState `json:"workspace_agent_monitor_state"` + Valid bool `json:"valid"` // Valid is true if WorkspaceAgentMonitorState is not NULL +} + +// Scan implements the Scanner interface. +func (ns *NullWorkspaceAgentMonitorState) Scan(value interface{}) error { + if value == nil { + ns.WorkspaceAgentMonitorState, ns.Valid = "", false + return nil + } + ns.Valid = true + return ns.WorkspaceAgentMonitorState.Scan(value) +} + +// Value implements the driver Valuer interface. +func (ns NullWorkspaceAgentMonitorState) Value() (driver.Value, error) { + if !ns.Valid { + return nil, nil + } + return string(ns.WorkspaceAgentMonitorState), nil +} + +func (e WorkspaceAgentMonitorState) Valid() bool { + switch e { + case WorkspaceAgentMonitorStateOK, + WorkspaceAgentMonitorStateNOK: + return true + } + return false +} + +func AllWorkspaceAgentMonitorStateValues() []WorkspaceAgentMonitorState { + return []WorkspaceAgentMonitorState{ + WorkspaceAgentMonitorStateOK, + WorkspaceAgentMonitorStateNOK, + } +} + // What stage the script was ran in. type WorkspaceAgentScriptTimingStage string @@ -3185,10 +3243,13 @@ type WorkspaceAgentLogSource struct { } type WorkspaceAgentMemoryResourceMonitor struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } type WorkspaceAgentMetadatum struct { @@ -3259,11 +3320,14 @@ type WorkspaceAgentStat struct { } type WorkspaceAgentVolumeResourceMonitor struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - Path string `db:"path" json:"path"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + Threshold int32 `db:"threshold" json:"threshold"` + Path string `db:"path" json:"path"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } type WorkspaceApp struct { diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 31c4a18a5808a..42b88d855e4c3 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -480,6 +480,7 @@ type sqlcQuerier interface { UpdateGroupByID(ctx context.Context, arg UpdateGroupByIDParams) (Group, error) UpdateInactiveUsersToDormant(ctx context.Context, arg UpdateInactiveUsersToDormantParams) ([]UpdateInactiveUsersToDormantRow, error) UpdateMemberRoles(ctx context.Context, arg UpdateMemberRolesParams) (OrganizationMember, error) + UpdateMemoryResourceMonitor(ctx context.Context, arg UpdateMemoryResourceMonitorParams) error UpdateNotificationTemplateMethodByID(ctx context.Context, arg UpdateNotificationTemplateMethodByIDParams) (NotificationTemplate, error) UpdateOAuth2ProviderAppByID(ctx context.Context, arg UpdateOAuth2ProviderAppByIDParams) (OAuth2ProviderApp, error) UpdateOAuth2ProviderAppSecretByID(ctx context.Context, arg UpdateOAuth2ProviderAppSecretByIDParams) (OAuth2ProviderAppSecret, error) @@ -514,6 +515,7 @@ type sqlcQuerier interface { UpdateUserQuietHoursSchedule(ctx context.Context, arg UpdateUserQuietHoursScheduleParams) (User, error) UpdateUserRoles(ctx context.Context, arg UpdateUserRolesParams) (User, error) UpdateUserStatus(ctx context.Context, arg UpdateUserStatusParams) (User, error) + UpdateVolumeResourceMonitor(ctx context.Context, arg UpdateVolumeResourceMonitorParams) error UpdateWorkspace(ctx context.Context, arg UpdateWorkspaceParams) (WorkspaceTable, error) UpdateWorkspaceAgentConnectionByID(ctx context.Context, arg UpdateWorkspaceAgentConnectionByIDParams) error UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, arg UpdateWorkspaceAgentLifecycleStateByIDParams) error diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index dc9b04c2244f0..58722dc152005 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -12044,7 +12044,7 @@ func (q *sqlQuerier) UpsertWorkspaceAgentPortShare(ctx context.Context, arg Upse const fetchMemoryResourceMonitorsByAgentID = `-- name: FetchMemoryResourceMonitorsByAgentID :one SELECT - agent_id, enabled, threshold, created_at + agent_id, enabled, threshold, created_at, updated_at, state, debounced_until FROM workspace_agent_memory_resource_monitors WHERE @@ -12059,13 +12059,16 @@ func (q *sqlQuerier) FetchMemoryResourceMonitorsByAgentID(ctx context.Context, a &i.Enabled, &i.Threshold, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } const fetchVolumesResourceMonitorsByAgentID = `-- name: FetchVolumesResourceMonitorsByAgentID :many SELECT - agent_id, enabled, threshold, path, created_at + agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until FROM workspace_agent_volume_resource_monitors WHERE @@ -12087,6 +12090,9 @@ func (q *sqlQuerier) FetchVolumesResourceMonitorsByAgentID(ctx context.Context, &i.Threshold, &i.Path, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ); err != nil { return nil, err } @@ -12106,26 +12112,35 @@ INSERT INTO workspace_agent_memory_resource_monitors ( agent_id, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4) RETURNING agent_id, enabled, threshold, created_at + ($1, $2, $3, $4, $5, $6, $7) RETURNING agent_id, enabled, threshold, created_at, updated_at, state, debounced_until ` type InsertMemoryResourceMonitorParams struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } func (q *sqlQuerier) InsertMemoryResourceMonitor(ctx context.Context, arg InsertMemoryResourceMonitorParams) (WorkspaceAgentMemoryResourceMonitor, error) { row := q.db.QueryRowContext(ctx, insertMemoryResourceMonitor, arg.AgentID, arg.Enabled, + arg.State, arg.Threshold, arg.CreatedAt, + arg.UpdatedAt, + arg.DebouncedUntil, ) var i WorkspaceAgentMemoryResourceMonitor err := row.Scan( @@ -12133,6 +12148,9 @@ func (q *sqlQuerier) InsertMemoryResourceMonitor(ctx context.Context, arg Insert &i.Enabled, &i.Threshold, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } @@ -12143,19 +12161,25 @@ INSERT INTO agent_id, path, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4, $5) RETURNING agent_id, enabled, threshold, path, created_at + ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until ` type InsertVolumeResourceMonitorParams struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Path string `db:"path" json:"path"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Path string `db:"path" json:"path"` + Enabled bool `db:"enabled" json:"enabled"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg InsertVolumeResourceMonitorParams) (WorkspaceAgentVolumeResourceMonitor, error) { @@ -12163,8 +12187,11 @@ func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg Insert arg.AgentID, arg.Path, arg.Enabled, + arg.State, arg.Threshold, arg.CreatedAt, + arg.UpdatedAt, + arg.DebouncedUntil, ) var i WorkspaceAgentVolumeResourceMonitor err := row.Scan( @@ -12173,10 +12200,69 @@ func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg Insert &i.Threshold, &i.Path, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } +const updateMemoryResourceMonitor = `-- name: UpdateMemoryResourceMonitor :exec +UPDATE workspace_agent_memory_resource_monitors +SET + updated_at = $2, + state = $3, + debounced_until = $4 +WHERE + agent_id = $1 +` + +type UpdateMemoryResourceMonitorParams struct { + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` +} + +func (q *sqlQuerier) UpdateMemoryResourceMonitor(ctx context.Context, arg UpdateMemoryResourceMonitorParams) error { + _, err := q.db.ExecContext(ctx, updateMemoryResourceMonitor, + arg.AgentID, + arg.UpdatedAt, + arg.State, + arg.DebouncedUntil, + ) + return err +} + +const updateVolumeResourceMonitor = `-- name: UpdateVolumeResourceMonitor :exec +UPDATE workspace_agent_volume_resource_monitors +SET + updated_at = $3, + state = $4, + debounced_until = $5 +WHERE + agent_id = $1 AND path = $2 +` + +type UpdateVolumeResourceMonitorParams struct { + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Path string `db:"path" json:"path"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` +} + +func (q *sqlQuerier) UpdateVolumeResourceMonitor(ctx context.Context, arg UpdateVolumeResourceMonitorParams) error { + _, err := q.db.ExecContext(ctx, updateVolumeResourceMonitor, + arg.AgentID, + arg.Path, + arg.UpdatedAt, + arg.State, + arg.DebouncedUntil, + ) + return err +} + const deleteOldWorkspaceAgentLogs = `-- name: DeleteOldWorkspaceAgentLogs :exec WITH latest_builds AS ( diff --git a/coderd/database/queries/workspaceagentresourcemonitors.sql b/coderd/database/queries/workspaceagentresourcemonitors.sql index e70ef85f3cbd5..84ee5c67b37ef 100644 --- a/coderd/database/queries/workspaceagentresourcemonitors.sql +++ b/coderd/database/queries/workspaceagentresourcemonitors.sql @@ -19,11 +19,14 @@ INSERT INTO workspace_agent_memory_resource_monitors ( agent_id, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4) RETURNING *; + ($1, $2, $3, $4, $5, $6, $7) RETURNING *; -- name: InsertVolumeResourceMonitor :one INSERT INTO @@ -31,8 +34,29 @@ INSERT INTO agent_id, path, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4, $5) RETURNING *; + ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING *; + +-- name: UpdateMemoryResourceMonitor :exec +UPDATE workspace_agent_memory_resource_monitors +SET + updated_at = $2, + state = $3, + debounced_until = $4 +WHERE + agent_id = $1; + +-- name: UpdateVolumeResourceMonitor :exec +UPDATE workspace_agent_volume_resource_monitors +SET + updated_at = $3, + state = $4, + debounced_until = $5 +WHERE + agent_id = $1 AND path = $2; diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 2a58aa421f1c8..b928be1b52481 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1981,10 +1981,13 @@ func InsertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid. if prAgent.ResourcesMonitoring != nil { if prAgent.ResourcesMonitoring.Memory != nil { _, err = db.InsertMemoryResourceMonitor(ctx, database.InsertMemoryResourceMonitorParams{ - AgentID: agentID, - Enabled: prAgent.ResourcesMonitoring.Memory.Enabled, - Threshold: prAgent.ResourcesMonitoring.Memory.Threshold, - CreatedAt: dbtime.Now(), + AgentID: agentID, + Enabled: prAgent.ResourcesMonitoring.Memory.Enabled, + Threshold: prAgent.ResourcesMonitoring.Memory.Threshold, + State: database.WorkspaceAgentMonitorStateOK, + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + DebouncedUntil: time.Time{}, }) if err != nil { return xerrors.Errorf("failed to insert agent memory resource monitor into db: %w", err) @@ -1992,11 +1995,14 @@ func InsertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid. } for _, volume := range prAgent.ResourcesMonitoring.Volumes { _, err = db.InsertVolumeResourceMonitor(ctx, database.InsertVolumeResourceMonitorParams{ - AgentID: agentID, - Path: volume.Path, - Enabled: volume.Enabled, - Threshold: volume.Threshold, - CreatedAt: dbtime.Now(), + AgentID: agentID, + Path: volume.Path, + Enabled: volume.Enabled, + Threshold: volume.Threshold, + State: database.WorkspaceAgentMonitorStateOK, + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + DebouncedUntil: time.Time{}, }) if err != nil { return xerrors.Errorf("failed to insert agent volume resource monitor into db: %w", err) diff --git a/coderd/rbac/object_gen.go b/coderd/rbac/object_gen.go index 547e10859b5b7..e5323225120b5 100644 --- a/coderd/rbac/object_gen.go +++ b/coderd/rbac/object_gen.go @@ -299,6 +299,7 @@ var ( // Valid Actions // - "ActionCreate" :: create workspace agent resource monitor // - "ActionRead" :: read workspace agent resource monitor + // - "ActionUpdate" :: update workspace agent resource monitor ResourceWorkspaceAgentResourceMonitor = Object{ Type: "workspace_agent_resource_monitor", } diff --git a/coderd/rbac/policy/policy.go b/coderd/rbac/policy/policy.go index 6dc64f6660248..c06a2117cb4e9 100644 --- a/coderd/rbac/policy/policy.go +++ b/coderd/rbac/policy/policy.go @@ -306,6 +306,7 @@ var RBACPermissions = map[string]PermissionDefinition{ Actions: map[Action]ActionDefinition{ ActionRead: actDef("read workspace agent resource monitor"), ActionCreate: actDef("create workspace agent resource monitor"), + ActionUpdate: actDef("update workspace agent resource monitor"), }, }, } diff --git a/coderd/rbac/roles_test.go b/coderd/rbac/roles_test.go index 6db591d028454..db0d9832579fc 100644 --- a/coderd/rbac/roles_test.go +++ b/coderd/rbac/roles_test.go @@ -779,7 +779,7 @@ func TestRolePermissions(t *testing.T) { }, { Name: "ResourceMonitor", - Actions: []policy.Action{policy.ActionRead, policy.ActionCreate}, + Actions: []policy.Action{policy.ActionRead, policy.ActionCreate, policy.ActionUpdate}, Resource: rbac.ResourceWorkspaceAgentResourceMonitor, AuthorizeMap: map[bool][]hasAuthSubjects{ true: {owner}, diff --git a/coderd/util/slice/slice.go b/coderd/util/slice/slice.go index 2a62e23592d84..508827dfaae81 100644 --- a/coderd/util/slice/slice.go +++ b/coderd/util/slice/slice.go @@ -177,3 +177,19 @@ func DifferenceFunc[T any](a []T, b []T, equal func(a, b T) bool) []T { } return tmp } + +func CountConsecutive[T comparable](needle T, haystack ...T) int { + maxLength := 0 + curLength := 0 + + for _, v := range haystack { + if v == needle { + curLength++ + } else { + maxLength = max(maxLength, curLength) + curLength = 0 + } + } + + return max(maxLength, curLength) +} diff --git a/coderd/workspaceagentsrpc.go b/coderd/workspaceagentsrpc.go index cbb3a1bc44b8a..c794c9c14349b 100644 --- a/coderd/workspaceagentsrpc.go +++ b/coderd/workspaceagentsrpc.go @@ -143,7 +143,9 @@ func (api *API) workspaceAgentRPC(rw http.ResponseWriter, r *http.Request) { Ctx: api.ctx, Log: logger, + Clock: api.Clock, Database: api.Database, + NotificationsEnqueuer: api.NotificationsEnqueuer, Pubsub: api.Pubsub, DerpMapFn: api.DERPMap, TailnetCoordinator: &api.TailnetCoordinator, diff --git a/codersdk/rbacresources_gen.go b/codersdk/rbacresources_gen.go index 8afb1858ca15c..f4d7790d40b76 100644 --- a/codersdk/rbacresources_gen.go +++ b/codersdk/rbacresources_gen.go @@ -92,7 +92,7 @@ var RBACResourceActions = map[RBACResource][]RBACAction{ ResourceTemplate: {ActionCreate, ActionDelete, ActionRead, ActionUpdate, ActionUse, ActionViewInsights}, ResourceUser: {ActionCreate, ActionDelete, ActionRead, ActionReadPersonal, ActionUpdate, ActionUpdatePersonal}, ResourceWorkspace: {ActionApplicationConnect, ActionCreate, ActionDelete, ActionRead, ActionSSH, ActionWorkspaceStart, ActionWorkspaceStop, ActionUpdate}, - ResourceWorkspaceAgentResourceMonitor: {ActionCreate, ActionRead}, + ResourceWorkspaceAgentResourceMonitor: {ActionCreate, ActionRead, ActionUpdate}, ResourceWorkspaceDormant: {ActionApplicationConnect, ActionCreate, ActionDelete, ActionRead, ActionSSH, ActionWorkspaceStart, ActionWorkspaceStop, ActionUpdate}, ResourceWorkspaceProxy: {ActionCreate, ActionDelete, ActionRead, ActionUpdate}, } diff --git a/site/src/api/rbacresourcesGenerated.ts b/site/src/api/rbacresourcesGenerated.ts index e557ceddbdda6..437f89ec776a7 100644 --- a/site/src/api/rbacresourcesGenerated.ts +++ b/site/src/api/rbacresourcesGenerated.ts @@ -171,6 +171,7 @@ export const RBACResourceActions: Partial< workspace_agent_resource_monitor: { create: "create workspace agent resource monitor", read: "read workspace agent resource monitor", + update: "update workspace agent resource monitor", }, workspace_dormant: { application_connect: "connect to workspace apps via browser",
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: