From c6a1936c5153eeddcd9c8ebc6e4ab83c24c3386a Mon Sep 17 00:00:00 2001 From: naveensrinivasan <172697+naveensrinivasan@users.noreply.github.com> Date: Sun, 17 Jul 2022 18:57:41 -0500 Subject: [PATCH] :seedling: Export Scorecards results for API - Exporting the Scorecard results for the scorecard API. - The code exports as result.json without the commit SHA and also with the commit SHA. --- clients/githubrepo/client.go | 2 +- clients/localdir/client.go | 2 +- cron/internal/config/config.go | 41 +++++++++++++++++------------ cron/internal/config/config.yaml | 2 ++ cron/internal/config/config_test.go | 20 ++++++++++++-- cron/internal/controller/main.go | 2 +- cron/internal/worker/main.go | 20 ++++++++++++-- 7 files changed, 65 insertions(+), 24 deletions(-) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index d4706ae492e..c7fef13b6de 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -180,7 +180,7 @@ func (client *Client) ListStatuses(ref string) ([]clients.Status, error) { return client.statuses.listStatuses(ref) } -//ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages. +// ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages. func (client *Client) ListProgrammingLanguages() ([]clients.Language, error) { return client.languages.listProgrammingLanguages() } diff --git a/clients/localdir/client.go b/clients/localdir/client.go index 7b58ea8d6fd..e8887bf5d2b 100644 --- a/clients/localdir/client.go +++ b/clients/localdir/client.go @@ -219,7 +219,7 @@ func (client *localDirClient) Close() error { } // ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages. -// TODO: add ListProgrammingLanguages support for local directories +// TODO: add ListProgrammingLanguages support for local directories. func (client *localDirClient) ListProgrammingLanguages() ([]clients.Language, error) { return nil, fmt.Errorf("ListProgrammingLanguages: %w", clients.ErrUnsupportedFeature) } diff --git a/cron/internal/config/config.go b/cron/internal/config/config.go index 30b13ee6f68..9bd4558d051 100644 --- a/cron/internal/config/config.go +++ b/cron/internal/config/config.go @@ -34,19 +34,20 @@ const ( // ShardNumFilename is the name of the file that stores the number of shards. ShardNumFilename string = ".shard_num" // TransferStatusFilename file identifies if shard transfer to BigQuery is completed. - TransferStatusFilename string = ".transfer_complete" - projectID string = "SCORECARD_PROJECT_ID" - requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL" - requestSubscriptionURL string = "SCORECARD_REQUEST_SUBSCRIPTION_URL" - bigqueryDataset string = "SCORECARD_BIGQUERY_DATASET" - completionThreshold string = "SCORECARD_COMPLETION_THRESHOLD" - shardSize string = "SCORECARD_SHARD_SIZE" - webhookURL string = "SCORECARD_WEBHOOK_URL" - metricExporter string = "SCORECARD_METRIC_EXPORTER" - ciiDataBucketURL string = "SCORECARD_CII_DATA_BUCKET_URL" - blacklistedChecks string = "SCORECARD_BLACKLISTED_CHECKS" - bigqueryTable string = "SCORECARD_BIGQUERY_TABLE" - resultDataBucketURL string = "SCORECARD_DATA_BUCKET_URL" + TransferStatusFilename string = ".transfer_complete" + projectID string = "SCORECARD_PROJECT_ID" + requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL" + requestSubscriptionURL string = "SCORECARD_REQUEST_SUBSCRIPTION_URL" + bigqueryDataset string = "SCORECARD_BIGQUERY_DATASET" + completionThreshold string = "SCORECARD_COMPLETION_THRESHOLD" + shardSize string = "SCORECARD_SHARD_SIZE" + webhookURL string = "SCORECARD_WEBHOOK_URL" + metricExporter string = "SCORECARD_METRIC_EXPORTER" + ciiDataBucketURL string = "SCORECARD_CII_DATA_BUCKET_URL" + blacklistedChecks string = "SCORECARD_BLACKLISTED_CHECKS" + bigqueryTable string = "SCORECARD_BIGQUERY_TABLE" + resultDataBucketURL string = "SCORECARD_DATA_BUCKET_URL" + bqExportResultsBucketURL string = "SCORECARD_BQ_EXPORT_RESULTS_BUCKET_URL" // Raw results. rawBigqueryTable string = "RAW_SCORECARD_BIGQUERY_TABLE" rawResultDataBucketURL string = "RAW_SCORECARD_DATA_BUCKET_URL" @@ -76,8 +77,9 @@ type config struct { MetricExporter string `yaml:"metric-exporter"` ShardSize int `yaml:"shard-size"` // Raw results. - RawResultDataBucketURL string `yaml:"raw-result-data-bucket-url"` - RawBigQueryTable string `yaml:"raw-bigquery-table"` + RawResultDataBucketURL string `yaml:"raw-result-data-bucket-url"` + RawBigQueryTable string `yaml:"raw-bigquery-table"` + BigQueryExportResultsBucketURL string `yaml:"bigquery-export-results-bucket-url"` } func getParsedConfigFromFile(byteValue []byte) (config, error) { @@ -120,7 +122,6 @@ func getIntConfigValue(envVar string, byteValue []byte, fieldName, configName st return 0, fmt.Errorf("error getting config value %s: %w", configName, err) } - // nolint: exhaustive switch value.Kind() { case reflect.String: //nolint:wrapcheck @@ -137,7 +138,7 @@ func getFloat64ConfigValue(envVar string, byteValue []byte, fieldName, configNam if err != nil { return 0, fmt.Errorf("error getting config value %s: %w", configName, err) } - // nolint: exhaustive + switch value.Kind() { case reflect.String: //nolint: wrapcheck, gomnd @@ -232,3 +233,9 @@ func GetBlacklistedChecks() ([]string, error) { func GetMetricExporter() (string, error) { return getStringConfigValue(metricExporter, configYAML, "MetricExporter", "metric-exporter") } + +// GetBQExportResultsBucketURL returns the bucket URL for storing cron job results. +func GetBQExportResultsBucketURL() (string, error) { + return getStringConfigValue(bqExportResultsBucketURL, configYAML, + "BigQueryExportResultsBucketURL", "bq-export-results-bucket-url") +} diff --git a/cron/internal/config/config.yaml b/cron/internal/config/config.yaml index 02f8d399cdf..50469789798 100644 --- a/cron/internal/config/config.yaml +++ b/cron/internal/config/config.yaml @@ -29,3 +29,5 @@ result-data-bucket-url: gs://ossf-scorecard-data2 # Raw results. raw-result-data-bucket-url: gs://ossf-scorecard-rawdata raw-bigquery-table: scorecard-rawdata +# export-bucket +bigquery-export-results-bucket-url: gs://ossf-scorecard-bq-export-results diff --git a/cron/internal/config/config_test.go b/cron/internal/config/config_test.go index aa473f58bca..a6dd7e2cebe 100644 --- a/cron/internal/config/config_test.go +++ b/cron/internal/config/config_test.go @@ -37,8 +37,9 @@ const ( prodShardSize int = 10 prodMetricExporter string = "stackdriver" // Raw results. - prodRawBucket = "gs://ossf-scorecard-rawdata" - prodRawBigQueryTable = "scorecard-rawdata" + prodRawBucket = "gs://ossf-scorecard-rawdata" + prodRawBigQueryTable = "scorecard-rawdata" + prodBigQueryExportsBucketURL = "gs://ossf-scorecard-bq-export-results" ) func getByteValueFromFile(filename string) ([]byte, error) { @@ -344,3 +345,18 @@ func TestGetMetricExporter(t *testing.T) { } }) } + +//nolint:paralleltest // Since os.Setenv is used. +func TestGetBigQueryExportsBucketURL(t *testing.T) { + t.Run("GetBigQueryExportsBucketURL", func(t *testing.T) { + bigqueryExportsBucketURL := bqExportResultsBucketURL + os.Unsetenv(bigqueryExportsBucketURL) + bucket, err := GetBQExportResultsBucketURL() + if err != nil { + t.Errorf("failed to get production bucket URL from config: %v", err) + } + if bucket != prodBigQueryExportsBucketURL { + t.Errorf("test failed: expected - %s, got = %s", prodBigQueryExportsBucketURL, bucket) + } + }) +} diff --git a/cron/internal/controller/main.go b/cron/internal/controller/main.go index 7b5ff9d3d83..d80f2ef7425 100644 --- a/cron/internal/controller/main.go +++ b/cron/internal/controller/main.go @@ -133,7 +133,7 @@ func main() { ShardLoc: new(string), CommitSha: new(string), } - *metadata.NumShard = (shardNum + 1) + *metadata.NumShard = shardNum + 1 *metadata.ShardLoc = bucket + "/" + data.GetBlobFilename("", t) *metadata.CommitSha = version.GetVersionInfo().GitCommit metadataJSON, err := protojson.Marshal(&metadata) diff --git a/cron/internal/worker/main.go b/cron/internal/worker/main.go index e539a8c20a8..bcad520d739 100644 --- a/cron/internal/worker/main.go +++ b/cron/internal/worker/main.go @@ -48,7 +48,7 @@ var ignoreRuntimeErrors = flag.Bool("ignoreRuntimeErrors", false, "if set to tru // nolint: gocognit func processRequest(ctx context.Context, batchRequest *data.ScorecardBatchRequest, - blacklistedChecks []string, bucketURL, rawBucketURL string, + blacklistedChecks []string, bucketURL, rawBucketURL, exportBucketURL string, checkDocs docs.Doc, repoClient clients.RepoClient, ossFuzzRepoClient clients.RepoClient, ciiClient clients.CIIBestPracticesClient, @@ -101,6 +101,7 @@ func processRequest(ctx context.Context, for _, check := range blacklistedChecks { delete(checksToRun, check) } + result, err := pkg.RunScorecards(ctx, repo, commitSHA, checksToRun, repoClient, ossFuzzRepoClient, ciiClient, vulnsClient) if errors.Is(err, sce.ErrRepoUnreachable) { @@ -128,11 +129,21 @@ func processRequest(ctx context.Context, if err := format.AsJSON2(&result, true /*showDetails*/, log.InfoLevel, checkDocs, &buffer2); err != nil { return fmt.Errorf("error during result.AsJSON2: %w", err) } + exportPath := fmt.Sprintf("%s/result.json", repo.URI()) + exportCommitSHAPath := fmt.Sprintf("%s/%s/result.json", repo.URI(), result.Repo.CommitSHA) // Raw result. if err := format.AsRawJSON(&result, &rawBuffer); err != nil { return fmt.Errorf("error during result.AsRawJSON: %w", err) } + + if err := data.WriteToBlobStore(ctx, exportBucketURL, exportPath, buffer2.Bytes()); err != nil { + return fmt.Errorf("error during WriteToBlobStore2: %w", err) + } + // Export result based on commitSHA. + if err := data.WriteToBlobStore(ctx, exportBucketURL, exportCommitSHAPath, buffer2.Bytes()); err != nil { + return fmt.Errorf("error during WriteToBlobStore2: %w", err) + } } if err := data.WriteToBlobStore(ctx, bucketURL, filename, buffer2.Bytes()); err != nil { @@ -207,6 +218,11 @@ func main() { panic(err) } + exportBucketURL, err := config.GetBQExportResultsBucketURL() + if err != nil { + panic(err) + } + logger := log.NewLogger(log.InfoLevel) repoClient := githubrepo.CreateGithubRepoClient(ctx, logger) ciiClient := clients.BlobCIIBestPracticesClient(ciiDataBucketURL) @@ -242,7 +258,7 @@ func main() { break } if err := processRequest(ctx, req, blacklistedChecks, - bucketURL, rawBucketURL, checkDocs, + bucketURL, rawBucketURL, exportBucketURL, checkDocs, repoClient, ossFuzzRepoClient, ciiClient, vulnsClient, logger); err != nil { // TODO(log): Previously Warn. Consider logging an error here. logger.Info(fmt.Sprintf("error processing request: %v", err))