Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove expired data from the database, ISSUE-952 #2406

Merged
merged 10 commits into from Jun 20, 2022
22 changes: 22 additions & 0 deletions cmd/cleanup/root.go
@@ -0,0 +1,22 @@
package cleanup

import (
"github.com/spf13/cobra"

"github.com/ory/x/configx"
)

func NewCleanupCmd() *cobra.Command {
c := &cobra.Command{
Use: "cleanup",
Short: "Various cleanup helpers",
}
configx.RegisterFlags(c.PersistentFlags())
return c
}

func RegisterCommandRecursive(parent *cobra.Command) {
c := NewCleanupCmd()
parent.AddCommand(c)
c.AddCommand(NewCleanupSQLCmd())
}
55 changes: 55 additions & 0 deletions cmd/cleanup/sql.go
@@ -0,0 +1,55 @@
/*
Copyright © 2019 NAME HERE <EMAIL ADDRESS>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cleanup

import (
"fmt"
"time"

"github.com/spf13/cobra"

"github.com/ory/kratos/driver/config"

"github.com/ory/kratos/cmd/cliclient"
"github.com/ory/x/configx"
)

// cleanupSqlCmd represents the sql command
func NewCleanupSQLCmd() *cobra.Command {
c := &cobra.Command{
Use: "sql <database-url>",
Short: "Cleanup sql database from expired flows and sessions",
Long: `Run this command as frequently as you need.
It is recommended to run this command close to the SQL instance (e.g. same subnet) instead of over the public internet.
This decreases risk of failure and decreases time required.
You can read in the database URL using the -e flag, for example:
export DSN=...
kratos cleanup sql -e
### WARNING ###
Before running this command on an existing database, create a back up!
`,
Run: func(cmd *cobra.Command, args []string) {
err := cliclient.NewCleanupHandler().CleanupSQL(cmd, args)
if err != nil {
fmt.Fprintln(cmd.OutOrStdout(), err)
}
},
abador marked this conversation as resolved.
Show resolved Hide resolved
}

configx.RegisterFlags(c.PersistentFlags())
c.Flags().BoolP("read-from-env", "e", true, "If set, reads the database connection string from the environment variable DSN or config file key dsn.")
c.Flags().Duration(config.ViperKeyDatabaseCleanupSleepTables, time.Minute, "How long to wait between each table cleanup")
abador marked this conversation as resolved.
Show resolved Hide resolved
c.Flags().IntP(config.ViperKeyDatabaseCleanupBatchSize, "b", 100, "Set the number of records to be cleaned per run")
c.Flags().Duration("keep-last", 0, "Don't remove records younger than")
return c
}
69 changes: 69 additions & 0 deletions cmd/cliclient/cleanup.go
@@ -0,0 +1,69 @@
package cliclient

import (
"fmt"

"github.com/pkg/errors"

"github.com/ory/x/configx"

"github.com/spf13/cobra"

"github.com/ory/kratos/driver"
"github.com/ory/kratos/driver/config"
"github.com/ory/x/cmdx"
"github.com/ory/x/flagx"
)

type CleanupHandler struct{}

func NewCleanupHandler() *CleanupHandler {
return &CleanupHandler{}
}

func (h *CleanupHandler) CleanupSQL(cmd *cobra.Command, args []string) error {
var d driver.Registry

if flagx.MustGetBool(cmd, "read-from-env") {
d = driver.NewWithoutInit(
cmd.Context(),
cmd.ErrOrStderr(),
configx.WithFlags(cmd.Flags()),
configx.SkipValidation())
if len(d.Config(cmd.Context()).DSN()) == 0 {
fmt.Fprintln(cmd.OutOrStdout(), cmd.UsageString())
fmt.Fprintln(cmd.OutOrStdout(), "")
fmt.Fprintln(cmd.OutOrStdout(), "When using flag -e, environment variable DSN must be set")
return cmdx.FailSilently(cmd)
}
} else {
if len(args) != 1 {
fmt.Println(cmd.UsageString())
return cmdx.FailSilently(cmd)
}
d = driver.NewWithoutInit(
cmd.Context(),
cmd.ErrOrStderr(),
configx.WithFlags(cmd.Flags()),
configx.SkipValidation(),
configx.WithValue(config.ViperKeyDSN, args[0]))
}
abador marked this conversation as resolved.
Show resolved Hide resolved

err := d.Init(cmd.Context(), driver.SkipNetworkInit)
if err != nil {
return errors.Wrap(err, "An error occurred initializing cleanup")
}

keepLast := flagx.MustGetDuration(cmd, "keep-last")

err = d.Persister().CleanupDatabase(
cmd.Context(),
d.Config(cmd.Context()).DatabaseCleanupSleepTables(),
keepLast,
d.Config(cmd.Context()).DatabaseCleanupBatchSize())
if err != nil {
return errors.Wrap(err, "An error occurred while cleaning up expired data")
}

return nil
}
3 changes: 3 additions & 0 deletions cmd/root.go
Expand Up @@ -5,6 +5,8 @@ import (
"fmt"
"os"

"github.com/ory/kratos/cmd/cleanup"

"github.com/ory/kratos/driver/config"

"github.com/ory/kratos/cmd/courier"
Expand Down Expand Up @@ -36,6 +38,7 @@ func NewRootCmd() (cmd *cobra.Command) {
cmd.AddCommand(identities.NewListCmd(cmd))
migrate.RegisterCommandRecursive(cmd)
serve.RegisterCommandRecursive(cmd)
cleanup.RegisterCommandRecursive(cmd)
remote.RegisterCommandRecursive(cmd)
cmd.AddCommand(identities.NewValidateCmd())
cmd.AddCommand(cmdx.Version(&config.Version, &config.Commit, &config.Date))
Expand Down
2 changes: 2 additions & 0 deletions continuity/persistence.go
Expand Up @@ -2,6 +2,7 @@ package continuity

import (
"context"
"time"

"github.com/gofrs/uuid"
)
Expand All @@ -14,4 +15,5 @@ type Persister interface {
SaveContinuitySession(ctx context.Context, c *Container) error
GetContinuitySession(ctx context.Context, id uuid.UUID) (*Container, error)
DeleteContinuitySession(ctx context.Context, id uuid.UUID) error
DeleteExpiredContinuitySessions(context.Context, time.Time, int) error
abador marked this conversation as resolved.
Show resolved Hide resolved
}
23 changes: 23 additions & 0 deletions continuity/test/persistence.go
Expand Up @@ -94,5 +94,28 @@ func TestPersister(ctx context.Context, p interface {
require.ErrorIs(t, err, sqlcon.ErrNoRows)
})
})

t.Run("case=cleanup", func(t *testing.T) {
id := x.NewUUID()
now := time.Now().Add(-24 * time.Hour).UTC().Truncate(time.Second)
abador marked this conversation as resolved.
Show resolved Hide resolved
m := sqlxx.NullJSONRawMessage(`{"foo": "bar"}`)
expected := continuity.Container{Name: "foo", IdentityID: x.PointToUUID(createIdentity(t).ID),
ExpiresAt: now,
Payload: m,
}
expected.ID = id

t.Run("can cleanup", func(t *testing.T) {
require.NoError(t, p.SaveContinuitySession(ctx, &expected))

assert.EqualValues(t, id, expected.ID)
assert.EqualValues(t, nid, expected.NID)

require.NoError(t, p.DeleteExpiredContinuitySessions(ctx, time.Now(), 5))

_, err := p.GetContinuitySession(ctx, id)
require.Error(t, err)
})
})
}
}
10 changes: 10 additions & 0 deletions driver/config/config.go
Expand Up @@ -153,6 +153,8 @@ const (
ViperKeyHasherArgon2ConfigDedicatedMemory = "hashers.argon2.dedicated_memory"
ViperKeyHasherBcryptCost = "hashers.bcrypt.cost"
ViperKeyCipherAlgorithm = "ciphers.algorithm"
ViperKeyDatabaseCleanupSleepTables = "database.cleanup.sleep.tables"
ViperKeyDatabaseCleanupBatchSize = "database.cleanup.batch_size"
ViperKeyLinkLifespan = "selfservice.methods.link.config.lifespan"
ViperKeyLinkBaseURL = "selfservice.methods.link.config.base_url"
ViperKeyPasswordHaveIBeenPwnedHost = "selfservice.methods.password.config.haveibeenpwned_host"
Expand Down Expand Up @@ -1074,6 +1076,14 @@ func (p *Config) SelfServiceLinkMethodBaseURL() *url.URL {
return p.p.RequestURIF(ViperKeyLinkBaseURL, p.SelfPublicURL())
}

func (p *Config) DatabaseCleanupSleepTables() time.Duration {
return p.p.DurationF(ViperKeyDatabaseCleanupSleepTables, 5*time.Second)
abador marked this conversation as resolved.
Show resolved Hide resolved
}

func (p *Config) DatabaseCleanupBatchSize() int {
return p.p.IntF(ViperKeyDatabaseCleanupBatchSize, 100)
abador marked this conversation as resolved.
Show resolved Hide resolved
}

func (p *Config) SelfServiceFlowRecoveryAfterHooks(strategy string) []SelfServiceHook {
return p.selfServiceHooks(HookStrategyKey(ViperKeySelfServiceRecoveryAfter, strategy))
}
Expand Down
16 changes: 15 additions & 1 deletion driver/config/config_test.go
Expand Up @@ -48,7 +48,7 @@ func TestViperProvider(t *testing.T) {
p := config.MustNew(t, logrusx.New("", ""), os.Stderr,
configx.WithConfigFiles("stub/.kratos.yaml"))

t.Run("gourp=client config", func(t *testing.T) {
t.Run("group=client config", func(t *testing.T) {
assert.False(t, p.ClientHTTPNoPrivateIPRanges(), "Should not have private IP ranges disabled per default")
p.MustSet(config.ViperKeyClientHTTPNoPrivateIPRanges, true)
assert.True(t, p.ClientHTTPNoPrivateIPRanges(), "Should disallow private IP ranges if set")
Expand Down Expand Up @@ -1152,3 +1152,17 @@ func TestCourierTemplatesConfig(t *testing.T) {
assert.Equal(t, courierTemplateConfig, c.CourierTemplatesHelper(config.ViperKeyCourierTemplatesRecoveryValidEmail))
})
}

func TestCleanup(t *testing.T) {
p := config.MustNew(t, logrusx.New("", ""), os.Stderr,
configx.WithConfigFiles("stub/.kratos.yaml"))

t.Run("group=cleanup config", func(t *testing.T) {
assert.Equal(t, p.DatabaseCleanupSleepTables(), 1*time.Minute)
p.MustSet(config.ViperKeyDatabaseCleanupSleepTables, time.Second)
assert.Equal(t, p.DatabaseCleanupSleepTables(), time.Second)
assert.Equal(t, p.DatabaseCleanupBatchSize(), 100)
p.MustSet(config.ViperKeyDatabaseCleanupBatchSize, 1)
assert.Equal(t, p.DatabaseCleanupSleepTables(), 1)
})
}
43 changes: 43 additions & 0 deletions embedx/config.schema.json
Expand Up @@ -1434,6 +1434,49 @@
}
}
},
"database": {
"type": "object",
"title": "Database related configuration",
"description": "Miscellaneous settings used in database related tasks (cleanup, etc.)",
"properties": {
"cleanup": {
"type": "object",
"title": "Database cleanup settings",
"description": "Settings that controls how the database cleanup process is configured (delays, batch size, etc.)",
"properties": {
"batch_size" : {
"type": "integer",
"title": "Number of records to clean in one iteration",
"description": "Controls how many records should be purged from one table during database cleanup task",
"minimum": 1,
"default": 100
},
"sleep": {
"type": "object",
"title": "Delays between various database cleanup phases",
"description": "Configures delays between each step of the cleanup process. It is useful to tune the process so it will be efficient and performant.",
"properties": {
"tables": {
"type": "string",
"title": "Delay between each table cleanups",
"description": "Controls the delay time between cleaning each table in one cleanup iteration",
"pattern": "^[0-9]+(ns|us|ms|s|m|h)$",
"default": "1m"
}
}
},
"older_than": {
"type": "string",
"title": "Remove records older than",
"description": "Controls how old records do we want to leave",
"pattern": "^[0-9]+(ns|us|ms|s|m|h)$",
"default": "0s"
}
}
}
},
"additionalProperties": false
},
"dsn": {
"type": "string",
"title": "Data Source Name",
Expand Down
3 changes: 3 additions & 0 deletions internal/driver.go
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"os"
"testing"
"time"

"github.com/ory/kratos/corp"

Expand Down Expand Up @@ -45,6 +46,8 @@ func NewConfigurationWithDefaults(t *testing.T) *config.Config {
config.ViperKeyCourierSMTPURL: "smtp://foo:bar@baz.com/",
config.ViperKeySelfServiceBrowserDefaultReturnTo: "https://www.ory.sh/redirect-not-set",
config.ViperKeySecretsCipher: []string{"secret-thirty-two-character-long"},
config.ViperKeyDatabaseCleanupSleepTables: 1 * time.Minute,
config.ViperKeyDatabaseCleanupBatchSize: 100,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those defaults will be loaded from the config schema, so there should be no need to add them here.

}),
configx.SkipValidation(),
)
Expand Down
2 changes: 2 additions & 0 deletions persistence/reference.go
Expand Up @@ -2,6 +2,7 @@ package persistence

import (
"context"
"time"

"github.com/ory/x/networkx"

Expand Down Expand Up @@ -43,6 +44,7 @@ type Persister interface {
link.RecoveryTokenPersister
link.VerificationTokenPersister

CleanupDatabase(context.Context, time.Duration, time.Duration, int) error
Close(context.Context) error
Ping() error
MigrationStatus(c context.Context) (popx.MigrationStatuses, error)
Expand Down