diff --git a/.github/actions/test-cache/download/action.yml b/.github/actions/test-cache/download/action.yml new file mode 100644 index 0000000000000..06a87fee06d4b --- /dev/null +++ b/.github/actions/test-cache/download/action.yml @@ -0,0 +1,50 @@ +name: "Download Test Cache" +description: | + Downloads the test cache and outputs today's cache key. + A PR job can use a cache if it was created by its base branch, its current + branch, or the default branch. + https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache +outputs: + cache-key: + description: "Today's cache key" + value: ${{ steps.vars.outputs.cache-key }} +inputs: + key-prefix: + description: "Prefix for the cache key" + required: true + cache-path: + description: "Path to the cache directory" + required: true + # This path is defined in testutil/cache.go + default: "~/.cache/coderv2-test" +runs: + using: "composite" + steps: + - name: Get date values and cache key + id: vars + shell: bash + run: | + export YEAR_MONTH=$(date +'%Y-%m') + export PREV_YEAR_MONTH=$(date -d 'last month' +'%Y-%m') + export DAY=$(date +'%d') + echo "year-month=$YEAR_MONTH" >> $GITHUB_OUTPUT + echo "prev-year-month=$PREV_YEAR_MONTH" >> $GITHUB_OUTPUT + echo "cache-key=${{ inputs.key-prefix }}-${YEAR_MONTH}-${DAY}" >> $GITHUB_OUTPUT + + # TODO: As a cost optimization, we could remove caches that are older than + # a day or two. By default, depot keeps caches for 14 days, which isn't + # necessary for the test cache. + # https://depot.dev/docs/github-actions/overview#cache-retention-policy + - name: Download test cache + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ inputs.cache-path }} + key: ${{ steps.vars.outputs.cache-key }} + # > If there are multiple partial matches for a restore key, the action returns the most recently created cache. + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#matching-a-cache-key + # The second restore key allows non-main branches to use the cache from the previous month. + # This prevents PRs from rebuilding the cache on the first day of the month. + # It also makes sure that once a month, the cache is fully reset. + restore-keys: | + ${{ inputs.key-prefix }}-${{ steps.vars.outputs.year-month }}- + ${{ github.ref != 'refs/heads/main' && format('{0}-{1}-', inputs.key-prefix, steps.vars.outputs.prev-year-month) || '' }} diff --git a/.github/actions/test-cache/upload/action.yml b/.github/actions/test-cache/upload/action.yml new file mode 100644 index 0000000000000..a4d524164c74c --- /dev/null +++ b/.github/actions/test-cache/upload/action.yml @@ -0,0 +1,20 @@ +name: "Upload Test Cache" +description: Uploads the test cache. Only works on the main branch. +inputs: + cache-key: + description: "Cache key" + required: true + cache-path: + description: "Path to the cache directory" + required: true + # This path is defined in testutil/cache.go + default: "~/.cache/coderv2-test" +runs: + using: "composite" + steps: + - name: Upload test cache + if: ${{ github.ref == 'refs/heads/main' }} + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ inputs.cache-path }} + key: ${{ inputs.cache-key }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 54239330f2a4f..3c5104fbefa5f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -346,6 +346,12 @@ jobs: - name: Setup Terraform uses: ./.github/actions/setup-tf + - name: Download Test Cache + id: download-cache + uses: ./.github/actions/test-cache/download + with: + key-prefix: test-go-${{ runner.os }}-${{ runner.arch }} + - name: Test with Mock Database id: test shell: bash @@ -370,6 +376,11 @@ jobs: gotestsum --junitfile="gotests.xml" --jsonfile="gotests.json" \ --packages="./..." -- $PARALLEL_FLAG -short -failfast + - name: Upload Test Cache + uses: ./.github/actions/test-cache/upload + with: + cache-key: ${{ steps.download-cache.outputs.cache-key }} + - name: Upload test stats to Datadog timeout-minutes: 1 continue-on-error: true @@ -467,6 +478,12 @@ jobs: if: runner.os == 'Windows' uses: ./.github/actions/setup-imdisk + - name: Download Test Cache + id: download-cache + uses: ./.github/actions/test-cache/download + with: + key-prefix: test-go-pg-${{ runner.os }}-${{ runner.arch }} + - name: Test with PostgreSQL Database env: POSTGRES_VERSION: "13" @@ -481,6 +498,11 @@ jobs: make test-postgres + - name: Upload Test Cache + uses: ./.github/actions/test-cache/upload + with: + cache-key: ${{ steps.download-cache.outputs.cache-key }} + - name: Upload test stats to Datadog timeout-minutes: 1 continue-on-error: true @@ -519,6 +541,12 @@ jobs: - name: Setup Terraform uses: ./.github/actions/setup-tf + - name: Download Test Cache + id: download-cache + uses: ./.github/actions/test-cache/download + with: + key-prefix: test-go-pg-16-${{ runner.os }}-${{ runner.arch }} + - name: Test with PostgreSQL Database env: POSTGRES_VERSION: "16" @@ -526,6 +554,11 @@ jobs: run: | make test-postgres + - name: Upload Test Cache + uses: ./.github/actions/test-cache/upload + with: + cache-key: ${{ steps.download-cache.outputs.cache-key }} + - name: Upload test stats to Datadog timeout-minutes: 1 continue-on-error: true @@ -556,6 +589,12 @@ jobs: - name: Setup Terraform uses: ./.github/actions/setup-tf + - name: Download Test Cache + id: download-cache + uses: ./.github/actions/test-cache/download + with: + key-prefix: test-go-race-${{ runner.os }}-${{ runner.arch }} + # We run race tests with reduced parallelism because they use more CPU and we were finding # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. @@ -564,6 +603,11 @@ jobs: run: | gotestsum --junitfile="gotests.xml" -- -race -parallel 4 -p 4 ./... + - name: Upload Test Cache + uses: ./.github/actions/test-cache/upload + with: + cache-key: ${{ steps.download-cache.outputs.cache-key }} + - name: Upload test stats to Datadog timeout-minutes: 1 continue-on-error: true @@ -594,6 +638,12 @@ jobs: - name: Setup Terraform uses: ./.github/actions/setup-tf + - name: Download Test Cache + id: download-cache + uses: ./.github/actions/test-cache/download + with: + key-prefix: test-go-race-pg-${{ runner.os }}-${{ runner.arch }} + # We run race tests with reduced parallelism because they use more CPU and we were finding # instances where tests appear to hang for multiple seconds, resulting in flaky tests when # short timeouts are used. @@ -605,6 +655,11 @@ jobs: make test-postgres-docker DB=ci gotestsum --junitfile="gotests.xml" -- -race -parallel 4 -p 4 ./... + - name: Upload Test Cache + uses: ./.github/actions/test-cache/upload + with: + cache-key: ${{ steps.download-cache.outputs.cache-key }} + - name: Upload test stats to Datadog timeout-minutes: 1 continue-on-error: true diff --git a/provisioner/terraform/executor.go b/provisioner/terraform/executor.go index 150f51e6dd10d..442ed36074eb2 100644 --- a/provisioner/terraform/executor.go +++ b/provisioner/terraform/executor.go @@ -35,8 +35,9 @@ type executor struct { mut *sync.Mutex binaryPath string // cachePath and workdir must not be used by multiple processes at once. - cachePath string - workdir string + cachePath string + cliConfigPath string + workdir string // used to capture execution times at various stages timings *timingAggregator } @@ -50,6 +51,9 @@ func (e *executor) basicEnv() []string { if e.cachePath != "" && runtime.GOOS == "linux" { env = append(env, "TF_PLUGIN_CACHE_DIR="+e.cachePath) } + if e.cliConfigPath != "" { + env = append(env, "TF_CLI_CONFIG_FILE="+e.cliConfigPath) + } return env } diff --git a/provisioner/terraform/provision_test.go b/provisioner/terraform/provision_test.go index e7b64046f3ab3..96514cc4b59ad 100644 --- a/provisioner/terraform/provision_test.go +++ b/provisioner/terraform/provision_test.go @@ -3,13 +3,17 @@ package terraform_test import ( + "bytes" "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "errors" "fmt" "net" "net/http" "os" + "os/exec" "path/filepath" "sort" "strings" @@ -29,10 +33,11 @@ import ( ) type provisionerServeOptions struct { - binaryPath string - exitTimeout time.Duration - workDir string - logger *slog.Logger + binaryPath string + cliConfigPath string + exitTimeout time.Duration + workDir string + logger *slog.Logger } func setupProvisioner(t *testing.T, opts *provisionerServeOptions) (context.Context, proto.DRPCProvisionerClient) { @@ -66,9 +71,10 @@ func setupProvisioner(t *testing.T, opts *provisionerServeOptions) (context.Cont Logger: *opts.logger, WorkDirectory: opts.workDir, }, - BinaryPath: opts.binaryPath, - CachePath: cachePath, - ExitTimeout: opts.exitTimeout, + BinaryPath: opts.binaryPath, + CachePath: cachePath, + ExitTimeout: opts.exitTimeout, + CliConfigPath: opts.cliConfigPath, }) }() api := proto.NewDRPCProvisionerClient(client) @@ -85,6 +91,168 @@ func configure(ctx context.Context, t *testing.T, client proto.DRPCProvisionerCl return sess } +func hashTemplateFilesAndTestName(t *testing.T, testName string, templateFiles map[string]string) string { + t.Helper() + + sortedFileNames := make([]string, 0, len(templateFiles)) + for fileName := range templateFiles { + sortedFileNames = append(sortedFileNames, fileName) + } + sort.Strings(sortedFileNames) + + // Inserting a delimiter between the file name and the file content + // ensures that a file named `ab` with content `cd` + // will not hash to the same value as a file named `abc` with content `d`. + // This can still happen if the file name or content include the delimiter, + // but hopefully they won't. + delimiter := []byte("🎉 🌱 🌷") + + hasher := sha256.New() + for _, fileName := range sortedFileNames { + file := templateFiles[fileName] + _, err := hasher.Write([]byte(fileName)) + require.NoError(t, err) + _, err = hasher.Write(delimiter) + require.NoError(t, err) + _, err = hasher.Write([]byte(file)) + require.NoError(t, err) + } + _, err := hasher.Write(delimiter) + require.NoError(t, err) + _, err = hasher.Write([]byte(testName)) + require.NoError(t, err) + + return hex.EncodeToString(hasher.Sum(nil)) +} + +const ( + terraformConfigFileName = "terraform.rc" + cacheProvidersDirName = "providers" + cacheTemplateFilesDirName = "files" +) + +// Writes a Terraform CLI config file (`terraform.rc`) in `dir` to enforce using the local provider mirror. +// This blocks network access for providers, forcing Terraform to use only what's cached in `dir`. +// Returns the path to the generated config file. +func writeCliConfig(t *testing.T, dir string) string { + t.Helper() + + cliConfigPath := filepath.Join(dir, terraformConfigFileName) + require.NoError(t, os.MkdirAll(filepath.Dir(cliConfigPath), 0o700)) + + content := fmt.Sprintf(` + provider_installation { + filesystem_mirror { + path = "%s" + include = ["*/*"] + } + direct { + exclude = ["*/*"] + } + } + `, filepath.Join(dir, cacheProvidersDirName)) + require.NoError(t, os.WriteFile(cliConfigPath, []byte(content), 0o600)) + return cliConfigPath +} + +func runCmd(t *testing.T, dir string, args ...string) { + t.Helper() + + stdout, stderr := bytes.NewBuffer(nil), bytes.NewBuffer(nil) + cmd := exec.Command(args[0], args[1:]...) //#nosec + cmd.Dir = dir + cmd.Stdout = stdout + cmd.Stderr = stderr + if err := cmd.Run(); err != nil { + t.Fatalf("failed to run %s: %s\nstdout: %s\nstderr: %s", strings.Join(args, " "), err, stdout.String(), stderr.String()) + } +} + +// Each test gets a unique cache dir based on its name and template files. +// This ensures that tests can download providers in parallel and that they +// will redownload providers if the template files change. +func getTestCacheDir(t *testing.T, rootDir string, testName string, templateFiles map[string]string) string { + t.Helper() + + hash := hashTemplateFilesAndTestName(t, testName, templateFiles) + dir := filepath.Join(rootDir, hash[:12]) + return dir +} + +// Ensures Terraform providers are downloaded and cached locally in a unique directory for the test. +// Uses `terraform init` then `mirror` to populate the cache if needed. +// Returns the cache directory path. +func downloadProviders(t *testing.T, rootDir string, testName string, templateFiles map[string]string) string { + t.Helper() + + dir := getTestCacheDir(t, rootDir, testName, templateFiles) + if _, err := os.Stat(dir); err == nil { + t.Logf("%s: using cached terraform providers", testName) + return dir + } + filesDir := filepath.Join(dir, cacheTemplateFilesDirName) + defer func() { + // The files dir will contain a copy of terraform providers generated + // by the terraform init command. We don't want to persist them since + // we already have a registry mirror in the providers dir. + if err := os.RemoveAll(filesDir); err != nil { + t.Logf("failed to remove files dir %s: %s", filesDir, err) + } + if !t.Failed() { + return + } + // If `downloadProviders` function failed, clean up the cache dir. + // We don't want to leave it around because it may be incomplete or corrupted. + if err := os.RemoveAll(dir); err != nil { + t.Logf("failed to remove dir %s: %s", dir, err) + } + }() + + require.NoError(t, os.MkdirAll(filesDir, 0o700)) + + for fileName, file := range templateFiles { + filePath := filepath.Join(filesDir, fileName) + require.NoError(t, os.MkdirAll(filepath.Dir(filePath), 0o700)) + require.NoError(t, os.WriteFile(filePath, []byte(file), 0o600)) + } + + providersDir := filepath.Join(dir, cacheProvidersDirName) + require.NoError(t, os.MkdirAll(providersDir, 0o700)) + + // We need to run init because if a test uses modules in its template, + // the mirror command will fail without it. + runCmd(t, filesDir, "terraform", "init") + // Now, mirror the providers into `providersDir`. We use this explicit mirror + // instead of relying only on the standard Terraform plugin cache. + // + // Why? Because this mirror, when used with the CLI config from `writeCliConfig`, + // prevents Terraform from hitting the network registry during `plan`. This cuts + // down on network calls, making CI tests less flaky. + // + // In contrast, the standard cache *still* contacts the registry for metadata + // during `init`, even if the plugins are already cached locally - see link below. + // + // Ref: https://developer.hashicorp.com/terraform/cli/config/config-file#provider-plugin-cache + // > When a plugin cache directory is enabled, the terraform init command will + // > still use the configured or implied installation methods to obtain metadata + // > about which plugins are available + runCmd(t, filesDir, "terraform", "providers", "mirror", providersDir) + + return dir +} + +// Caches providers locally and generates a Terraform CLI config to use *only* that cache. +// This setup prevents network access for providers during `terraform init`, improving reliability +// in subsequent test runs. +// Returns the path to the generated CLI config file. +func cacheProviders(t *testing.T, rootDir string, testName string, templateFiles map[string]string) string { + t.Helper() + + providersParentDir := downloadProviders(t, rootDir, testName, templateFiles) + cliConfigPath := writeCliConfig(t, providersParentDir) + return cliConfigPath +} + func readProvisionLog(t *testing.T, response proto.DRPCProvisioner_SessionClient) string { var logBuf strings.Builder for { @@ -352,6 +520,8 @@ func TestProvision(t *testing.T) { Apply bool // Some tests may need to be skipped until the relevant provider version is released. SkipReason string + // If SkipCacheProviders is true, then skip caching the terraform providers for this test. + SkipCacheProviders bool }{ { Name: "missing-variable", @@ -422,16 +592,18 @@ func TestProvision(t *testing.T) { Files: map[string]string{ "main.tf": `a`, }, - ErrorContains: "initialize terraform", - ExpectLogContains: "Argument or block definition required", + ErrorContains: "initialize terraform", + ExpectLogContains: "Argument or block definition required", + SkipCacheProviders: true, }, { Name: "bad-syntax-2", Files: map[string]string{ "main.tf": `;asdf;`, }, - ErrorContains: "initialize terraform", - ExpectLogContains: `The ";" character is not valid.`, + ErrorContains: "initialize terraform", + ExpectLogContains: `The ";" character is not valid.`, + SkipCacheProviders: true, }, { Name: "destroy-no-state", @@ -838,6 +1010,23 @@ func TestProvision(t *testing.T) { }, } + // Remove unused cache dirs before running tests. + // This cleans up any cache dirs that were created by tests that no longer exist. + cacheRootDir := filepath.Join(testutil.PersistentCacheDir(t), "terraform_provision_test") + expectedCacheDirs := make(map[string]bool) + for _, testCase := range testCases { + cacheDir := getTestCacheDir(t, cacheRootDir, testCase.Name, testCase.Files) + expectedCacheDirs[cacheDir] = true + } + currentCacheDirs, err := filepath.Glob(filepath.Join(cacheRootDir, "*")) + require.NoError(t, err) + for _, cacheDir := range currentCacheDirs { + if _, ok := expectedCacheDirs[cacheDir]; !ok { + t.Logf("removing unused cache dir: %s", cacheDir) + require.NoError(t, os.RemoveAll(cacheDir)) + } + } + for _, testCase := range testCases { testCase := testCase t.Run(testCase.Name, func(t *testing.T) { @@ -847,7 +1036,18 @@ func TestProvision(t *testing.T) { t.Skip(testCase.SkipReason) } - ctx, api := setupProvisioner(t, nil) + cliConfigPath := "" + if !testCase.SkipCacheProviders { + cliConfigPath = cacheProviders( + t, + cacheRootDir, + testCase.Name, + testCase.Files, + ) + } + ctx, api := setupProvisioner(t, &provisionerServeOptions{ + cliConfigPath: cliConfigPath, + }) sess := configure(ctx, t, api, &proto.Config{ TemplateSourceArchive: testutil.CreateTar(t, testCase.Files), }) diff --git a/provisioner/terraform/serve.go b/provisioner/terraform/serve.go index a84e8caf6b5ab..562946d8ef92e 100644 --- a/provisioner/terraform/serve.go +++ b/provisioner/terraform/serve.go @@ -28,7 +28,9 @@ type ServeOptions struct { BinaryPath string // CachePath must not be used by multiple processes at once. CachePath string - Tracer trace.Tracer + // CliConfigPath is the path to the Terraform CLI config file. + CliConfigPath string + Tracer trace.Tracer // ExitTimeout defines how long we will wait for a running Terraform // command to exit (cleanly) if the provision was stopped. This @@ -132,22 +134,24 @@ func Serve(ctx context.Context, options *ServeOptions) error { options.ExitTimeout = unhanger.HungJobExitTimeout } return provisionersdk.Serve(ctx, &server{ - execMut: &sync.Mutex{}, - binaryPath: options.BinaryPath, - cachePath: options.CachePath, - logger: options.Logger, - tracer: options.Tracer, - exitTimeout: options.ExitTimeout, + execMut: &sync.Mutex{}, + binaryPath: options.BinaryPath, + cachePath: options.CachePath, + cliConfigPath: options.CliConfigPath, + logger: options.Logger, + tracer: options.Tracer, + exitTimeout: options.ExitTimeout, }, options.ServeOptions) } type server struct { - execMut *sync.Mutex - binaryPath string - cachePath string - logger slog.Logger - tracer trace.Tracer - exitTimeout time.Duration + execMut *sync.Mutex + binaryPath string + cachePath string + cliConfigPath string + logger slog.Logger + tracer trace.Tracer + exitTimeout time.Duration } func (s *server) startTrace(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { @@ -158,12 +162,13 @@ func (s *server) startTrace(ctx context.Context, name string, opts ...trace.Span func (s *server) executor(workdir string, stage database.ProvisionerJobTimingStage) *executor { return &executor{ - server: s, - mut: s.execMut, - binaryPath: s.binaryPath, - cachePath: s.cachePath, - workdir: workdir, - logger: s.logger.Named("executor"), - timings: newTimingAggregator(stage), + server: s, + mut: s.execMut, + binaryPath: s.binaryPath, + cachePath: s.cachePath, + cliConfigPath: s.cliConfigPath, + workdir: workdir, + logger: s.logger.Named("executor"), + timings: newTimingAggregator(stage), } } diff --git a/testutil/cache.go b/testutil/cache.go new file mode 100644 index 0000000000000..82d45da3b3322 --- /dev/null +++ b/testutil/cache.go @@ -0,0 +1,25 @@ +package testutil + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +// PersistentCacheDir returns a path to a directory +// that will be cached between test runs in Github Actions. +func PersistentCacheDir(t *testing.T) string { + t.Helper() + + // We don't use os.UserCacheDir() because the path it + // returns is different on different operating systems. + // This would make it harder to specify which cache dir to use + // in Github Actions. + home, err := os.UserHomeDir() + require.NoError(t, err) + dir := filepath.Join(home, ".cache", "coderv2-test") + + return dir +} pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy