Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support --wal-dir in keeper #865

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions cmd/keeper/cmd/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ type config struct {

uid string
dataDir string
walDir string
debug bool
pgListenAddress string
pgAdvertiseAddress string
Expand Down Expand Up @@ -126,6 +127,7 @@ func init() {
CmdKeeper.PersistentFlags().StringVar(&cfg.uid, "id", "", "keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.")
CmdKeeper.PersistentFlags().StringVar(&cfg.uid, "uid", "", "keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.")
CmdKeeper.PersistentFlags().StringVar(&cfg.dataDir, "data-dir", "", "data directory")
CmdKeeper.PersistentFlags().StringVar(&cfg.walDir, "wal-dir", "", "wal directory")
CmdKeeper.PersistentFlags().StringVar(&cfg.pgListenAddress, "pg-listen-address", "", "postgresql instance listening address, local address used for the postgres instance. For all network interface, you can set the value to '*'.")
CmdKeeper.PersistentFlags().StringVar(&cfg.pgAdvertiseAddress, "pg-advertise-address", "", "postgresql instance address from outside. Use it to expose ip different than local ip with a NAT networking config")
CmdKeeper.PersistentFlags().StringVar(&cfg.pgPort, "pg-port", "5432", "postgresql instance listening port")
Expand Down Expand Up @@ -471,6 +473,7 @@ type PostgresKeeper struct {
bootUUID string

dataDir string
walDir string
pgListenAddress string
pgAdvertiseAddress string
pgPort string
Expand Down Expand Up @@ -522,6 +525,7 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
bootUUID: common.UUID(),

dataDir: dataDir,
walDir: cfg.walDir,

pgListenAddress: cfg.pgListenAddress,
pgAdvertiseAddress: cfg.pgAdvertiseAddress,
Expand Down Expand Up @@ -823,7 +827,7 @@ func (p *PostgresKeeper) Start(ctx context.Context) {

// TODO(sgotti) reconfigure the various configurations options
// (RequestTimeout) after a changed cluster config
pgm := pg.NewManager(p.pgBinPath, p.dataDir, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUAuthMethod, p.pgSUUsername, p.pgSUPassword, p.pgReplAuthMethod, p.pgReplUsername, p.pgReplPassword, p.requestTimeout)
pgm := pg.NewManager(p.pgBinPath, p.dataDir, p.walDir, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUAuthMethod, p.pgSUUsername, p.pgSUPassword, p.pgReplAuthMethod, p.pgReplUsername, p.pgReplPassword, p.requestTimeout)
p.pgm = pgm

_ = p.pgm.StopIfStarted(true)
Expand Down Expand Up @@ -916,7 +920,7 @@ func (p *PostgresKeeper) resync(db, masterDB, followedDB *cluster.DB, tryPgrewin
replSlot = common.StolonName(db.UID)
}

if err := pgm.RemoveAll(); err != nil {
if err := pgm.RemoveAllIfInitialized(); err != nil {
return fmt.Errorf("failed to remove the postgres data dir: %v", err)
}
if slog.IsDebug() {
Expand Down Expand Up @@ -1115,7 +1119,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) {
}

// Clean up cluster db datadir
if err = pgm.RemoveAll(); err != nil {
if err = pgm.RemoveAllIfInitialized(); err != nil {
log.Errorw("failed to remove the postgres data dir", zap.Error(err))
return
}
Expand Down Expand Up @@ -1174,7 +1178,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) {
log.Errorw("failed to stop pg instance", zap.Error(err))
return
}
if err = pgm.RemoveAll(); err != nil {
if err = pgm.RemoveAllIfInitialized(); err != nil {
log.Errorw("failed to remove the postgres data dir", zap.Error(err))
return
}
Expand Down Expand Up @@ -1236,7 +1240,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) {
log.Errorw("failed to stop pg instance", zap.Error(err))
return
}
if err = pgm.RemoveAll(); err != nil {
if err = pgm.RemoveAllIfInitialized(); err != nil {
log.Errorw("failed to remove the postgres data dir", zap.Error(err))
return
}
Expand Down
1 change: 1 addition & 0 deletions doc/commands/stolon-keeper.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ stolon-keeper [flags]
--store-skip-tls-verify skip store certificate verification (insecure!!!)
--store-timeout duration store request timeout (default 5s)
--uid string keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.
--wal-dir string wal directory
```

###### Auto generated by spf13/cobra on 24-Feb-2021
2 changes: 1 addition & 1 deletion doc/pitr.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Note: the `\"` is needed by json to put double quotes inside strings. We aren't
When initializing a cluster in pitr init mode a random registered keeper will be choosed and it'll start restoring the database with these steps:

* Remove the current data directory
* Call the `dataRestoreCommand` expanding every %d to the data directory full path. If it exits with a non zero exit code then stop here since something went wrong.
* Call the `dataRestoreCommand` expanding every %d to the data directory full path and every %w to the wal directory full path (if wal directory is provided to the keeper). If it exits with a non zero exit code then stop here since something went wrong.
* Create a `recovery.conf` with the right parameters and with `restore_command` set to `restoreCommand`.
* Start the postgres instance and wait for the archive recovery.

Expand Down
125 changes: 120 additions & 5 deletions internal/postgresql/postgresql.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type PGManager interface {
type Manager struct {
pgBinPath string
dataDir string
walDir string
parameters common.Parameters
recoveryOptions *RecoveryOptions
hba []string
Expand Down Expand Up @@ -133,10 +134,11 @@ func SetLogger(l *zap.SugaredLogger) {
log = l
}

func NewManager(pgBinPath string, dataDir string, localConnParams, replConnParams ConnParams, suAuthMethod, suUsername, suPassword, replAuthMethod, replUsername, replPassword string, requestTimeout time.Duration) *Manager {
func NewManager(pgBinPath string, dataDir, walDir string, localConnParams, replConnParams ConnParams, suAuthMethod, suUsername, suPassword, replAuthMethod, replUsername, replPassword string, requestTimeout time.Duration) *Manager {
return &Manager{
pgBinPath: pgBinPath,
dataDir: filepath.Join(dataDir, "postgres"),
walDir: walDir,
parameters: make(common.Parameters),
recoveryOptions: NewRecoveryOptions(),
curParameters: make(common.Parameters),
Expand Down Expand Up @@ -222,6 +224,13 @@ func (p *Manager) Init(initConfig *InitConfig) error {
}
log.Debugw("execing cmd", "cmd", cmd)

// initdb supports configuring a separate wal directory via symlinks. Normally this
// parameter might be part of the initConfig, but it will also be required whenever we
// fall-back to a pg_basebackup during a re-sync, which is why it's a Manager field.
if p.walDir != "" {
cmd.Args = append(cmd.Args, "--waldir", p.walDir)
}

if initConfig.Locale != "" {
cmd.Args = append(cmd.Args, "--locale", initConfig.Locale)
}
Expand All @@ -240,7 +249,9 @@ func (p *Manager) Init(initConfig *InitConfig) error {
}
// remove the dataDir, so we don't end with an half initialized database
if err != nil {
os.RemoveAll(p.dataDir)
if cleanupErr := p.RemoveAll(); cleanupErr != nil {
log.Errorf("failed to cleanup database: %v", cleanupErr)
}
return err
}
return nil
Expand All @@ -250,7 +261,7 @@ func (p *Manager) Restore(command string) error {
var err error
var cmd *exec.Cmd

command = expand(command, p.dataDir)
command = expandRecoveryCommand(command, p.dataDir, p.walDir)

if err = os.MkdirAll(p.dataDir, 0700); err != nil {
err = fmt.Errorf("cannot create data dir: %v", err)
Expand All @@ -269,7 +280,9 @@ func (p *Manager) Restore(command string) error {
// On every error remove the dataDir, so we don't end with an half initialized database
out:
if err != nil {
os.RemoveAll(p.dataDir)
if cleanupErr := p.RemoveAll(); cleanupErr != nil {
log.Errorf("failed to cleanup database: %v", cleanupErr)
}
return err
}
return nil
Expand All @@ -286,10 +299,98 @@ func (p *Manager) StartTmpMerged() error {
return p.start("-c", fmt.Sprintf("config_file=%s", tmpPostgresConfPath))
}

func (p *Manager) moveWal() (err error) {
var curPath string
var desiredPath string
var tmpPath string
symlinkPath := filepath.Join(p.dataDir, "pg_wal")
if curPath, err = filepath.EvalSymlinks(symlinkPath); err != nil {
log.Errorf("could not evaluate symlink %s: %e", symlinkPath, err)
return err
}
if p.walDir == "" {
desiredPath = symlinkPath
tmpPath = filepath.Join(p.dataDir, "pg_wal_new")
} else {
desiredPath = p.walDir
tmpPath = p.walDir
}
if curPath == desiredPath {
return nil
}
if p.walDir == "" {
log.Infof("moving WAL from %s to %s first and then to %s", curPath, tmpPath, desiredPath)
} else {
log.Infof("moving WAL from %s to new location %s", curPath, desiredPath)
}
// We use tmpPath here first and (if needed) mv tmpPath to desiredPath when all is copied.
// This allows stolon-keeper to re-read symlink dest and continue should stolon-keeper be restarted while copying.
log.Debugf("creating %s", tmpPath)
if err = os.MkdirAll(tmpPath, 0700); err != nil && !os.IsExist(err) {
log.Errorf("could not create new dest folder %s: %e", tmpPath, err)
return err
}
log.Debugf("moving WAL files from %s to %s", curPath, tmpPath)
if entries, err := ioutil.ReadDir(curPath); err != nil {
log.Errorf("could not read contents of folder %s: %e", curPath, err)
return err
} else {
for _, entry := range entries {
srcEntry := filepath.Join(curPath, entry.Name())
dstEntry := filepath.Join(tmpPath, entry.Name())
log.Debugf("moving %s to %s", srcEntry, dstEntry)
if err = os.Rename(srcEntry, dstEntry); err != nil {
sebasmannem marked this conversation as resolved.
Show resolved Hide resolved
log.Errorf("could not move %s to %s: %e", srcEntry, dstEntry, err)
return err
}
}
}

if symlinkStat, err := os.Lstat(symlinkPath); err != nil {
log.Errorf("could not get info on current pg_wal folder/symlink %s: %e", symlinkPath, err)
return err
} else if symlinkStat.Mode()&os.ModeSymlink != 0 {
if err = os.Remove(symlinkPath); err != nil {
log.Errorf("could not remove current pg_wal symlink %s: %e", symlinkPath, err)
return err
}
} else if symlinkStat.IsDir() {
if err := syscall.Rmdir(symlinkPath); err != nil {
log.Errorf("could not remove current folder %s: %e", symlinkPath, err)
return err
}
} else {
err := fmt.Errorf("location %s is no symlink and no dir, so please check and resolve by hand", symlinkPath)
log.Error(err)
return err
}
if p.walDir == "" {
// So we were moving WAL files back into PGDATA. Let's rename the tmpDir now holding all WAL files and use that
// as PGDATA/pg_wal
if err = os.Rename(tmpPath, desiredPath); err != nil {
sebasmannem marked this conversation as resolved.
Show resolved Hide resolved
log.Errorf("cannot move %s to %s: %e", tmpPath, desiredPath, err)
return err
}
} else {
log.Infof("symlinking %s to %s", symlinkPath, desiredPath)
if err = os.Symlink(desiredPath, symlinkPath); err != nil {
// We were copying WAL files from PGDATA (or another location) to a location outside of PGDATA and
// pointing the symlink in the right direction failed.
log.Errorf("could not create symlink %s to %s: %e", symlinkPath, desiredPath, err)
return err
}
}
log.Infof("moving pg_wal from %s to %s is succesful", curPath, desiredPath)
return nil
}

func (p *Manager) Start() error {
if err := p.writeConfs(false); err != nil {
return err
}
if err := p.moveWal(); err != nil {
return err
}
return p.start()
}

Expand Down Expand Up @@ -967,6 +1068,9 @@ func (p *Manager) SyncFromFollowed(followedConnParams ConnParams, replSlot strin
if replSlot != "" {
args = append(args, "--slot", replSlot)
}
if p.walDir != "" {
args = append(args, "--waldir", p.walDir)
}
cmd := exec.Command(name, args...)

cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSFILE=%s", pgpass.Name()))
Expand Down Expand Up @@ -1000,7 +1104,7 @@ func (p *Manager) SyncFromFollowed(followedConnParams ConnParams, replSlot strin
return nil
}

func (p *Manager) RemoveAll() error {
func (p *Manager) RemoveAllIfInitialized() error {
initialized, err := p.IsInitialized()
if err != nil {
return fmt.Errorf("failed to retrieve instance state: %v", err)
Expand All @@ -1016,6 +1120,17 @@ func (p *Manager) RemoveAll() error {
if started {
return fmt.Errorf("cannot remove postregsql database. Instance is active")
}

return p.RemoveAll()
}

// RemoveAll entirely cleans up the data directory, including any wal directory if that
// exists outside of the data directory.
func (p *Manager) RemoveAll() error {
if p.walDir != "" {
os.RemoveAll(p.walDir)
}

return os.RemoveAll(p.dataDir)
}

Expand Down
33 changes: 13 additions & 20 deletions internal/postgresql/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,27 +362,20 @@ func fileExists(path string) (bool, error) {
return true, nil
}

func expand(s, dataDir string) string {
buf := make([]byte, 0, 2*len(s))
// %d %% are all ASCII, so bytes are fine for this operation.
i := 0
for j := 0; j < len(s); j++ {
if s[j] == '%' && j+1 < len(s) {
switch s[j+1] {
case 'd':
buf = append(buf, s[i:j]...)
buf = append(buf, []byte(dataDir)...)
j += 1
i = j + 1
case '%':
j += 1
buf = append(buf, s[i:j]...)
i = j + 1
default:
}
// expandRecoveryCommand substitues the data and wal directories into a point-in-time
// recovery command string. Any %d become the data directory, any %w become the wal
// directory and any literal % characters are escaped by themselves (%% -> %).
func expandRecoveryCommand(cmd, dataDir, walDir string) string {
return regexp.MustCompile(`%[dw%]`).ReplaceAllStringFunc(cmd, func(match string) string {
switch match[1] {
case 'd':
return dataDir
case 'w':
return walDir
}
}
return string(buf) + s[i:]

return "%"
})
}

func getConfigFilePGParameters(ctx context.Context, connParams ConnParams) (common.Parameters, error) {
Expand Down
8 changes: 6 additions & 2 deletions internal/postgresql/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func TestValidReplSlotName(t *testing.T) {
}
}

func TestExpand(t *testing.T) {
func TestExpandRecoveryCommand(t *testing.T) {
tests := []struct {
in string
out string
Expand All @@ -106,6 +106,10 @@ func TestExpand(t *testing.T) {
in: "%d",
out: "/datadir",
},
{
in: "%w",
out: "/waldir",
},
{
in: "%%d",
out: "%d",
Expand All @@ -121,7 +125,7 @@ func TestExpand(t *testing.T) {
}

for i, tt := range tests {
out := expand(tt.in, "/datadir")
out := expandRecoveryCommand(tt.in, "/datadir", "/waldir")
if out != tt.out {
t.Errorf("#%d: wrong expanded string: got: %s, want: %s", i, out, tt.out)
}
Expand Down
1 change: 1 addition & 0 deletions internal/timer/timer_fallback.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !linux
// +build !linux

package timer
Expand Down
1 change: 1 addition & 0 deletions internal/timer/timer_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux
// +build linux

package timer
Expand Down
Loading