mirror of
https://github.com/k3s-io/k3s.git
synced 2024-06-07 19:41:36 +00:00
05f6255437
Ubuntu and Debian kernels support mounting real overlayfs inside userns, but the vanilla kernel still does not allow it. OTOH fuse-overlayfs can be mounted inside userns with the vanilla kernel (>= 4.18). Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
509 lines
13 KiB
Go
509 lines
13 KiB
Go
// +build linux
|
|
|
|
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package fuseoverlayfs
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
|
|
"github.com/containerd/containerd/log"
|
|
"github.com/containerd/containerd/mount"
|
|
"github.com/containerd/containerd/snapshots"
|
|
"github.com/containerd/containerd/snapshots/storage"
|
|
"github.com/containerd/continuity/fs"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
const (
|
|
fuseoverlayfsBinary = "fuse-overlayfs"
|
|
)
|
|
|
|
// SnapshotterConfig is used to configure the overlay snapshotter instance
|
|
type SnapshotterConfig struct {
|
|
asyncRemove bool
|
|
}
|
|
|
|
// Opt is an option to configure the overlay snapshotter
|
|
type Opt func(config *SnapshotterConfig) error
|
|
|
|
// AsynchronousRemove defers removal of filesystem content until
|
|
// the Cleanup method is called. Removals will make the snapshot
|
|
// referred to by the key unavailable and make the key immediately
|
|
// available for re-use.
|
|
//
|
|
// AsynchronousRemove is untested for fuse-overlayfs
|
|
func AsynchronousRemove(config *SnapshotterConfig) error {
|
|
config.asyncRemove = true
|
|
return nil
|
|
}
|
|
|
|
type snapshotter struct {
|
|
root string
|
|
ms *storage.MetaStore
|
|
asyncRemove bool
|
|
}
|
|
|
|
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
|
|
// diffs are stored under the provided root. A metadata file is stored under
|
|
// the root.
|
|
func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
|
|
var config SnapshotterConfig
|
|
for _, opt := range opts {
|
|
if err := opt(&config); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if err := os.MkdirAll(root, 0700); err != nil {
|
|
return nil, err
|
|
}
|
|
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
|
|
return nil, err
|
|
}
|
|
|
|
return &snapshotter{
|
|
root: root,
|
|
ms: ms,
|
|
asyncRemove: config.asyncRemove,
|
|
}, nil
|
|
}
|
|
|
|
// Stat returns the info for an active or committed snapshot by name or
|
|
// key.
|
|
//
|
|
// Should be used for parent resolution, existence checks and to discern
|
|
// the kind of snapshot.
|
|
func (o *snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, false)
|
|
if err != nil {
|
|
return snapshots.Info{}, err
|
|
}
|
|
defer t.Rollback()
|
|
_, info, _, err := storage.GetInfo(ctx, key)
|
|
if err != nil {
|
|
return snapshots.Info{}, err
|
|
}
|
|
|
|
return info, nil
|
|
}
|
|
|
|
func (o *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, true)
|
|
if err != nil {
|
|
return snapshots.Info{}, err
|
|
}
|
|
|
|
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
|
|
if err != nil {
|
|
t.Rollback()
|
|
return snapshots.Info{}, err
|
|
}
|
|
|
|
if err := t.Commit(); err != nil {
|
|
return snapshots.Info{}, err
|
|
}
|
|
|
|
return info, nil
|
|
}
|
|
|
|
// Usage returns the resources taken by the snapshot identified by key.
|
|
//
|
|
// For active snapshots, this will scan the usage of the overlay "diff" (aka
|
|
// "upper") directory and may take some time.
|
|
//
|
|
// For committed snapshots, the value is returned from the metadata database.
|
|
func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, false)
|
|
if err != nil {
|
|
return snapshots.Usage{}, err
|
|
}
|
|
id, info, usage, err := storage.GetInfo(ctx, key)
|
|
t.Rollback() // transaction no longer needed at this point.
|
|
|
|
if err != nil {
|
|
return snapshots.Usage{}, err
|
|
}
|
|
|
|
upperPath := o.upperPath(id)
|
|
|
|
if info.Kind == snapshots.KindActive {
|
|
du, err := fs.DiskUsage(ctx, upperPath)
|
|
if err != nil {
|
|
// TODO(stevvooe): Consider not reporting an error in this case.
|
|
return snapshots.Usage{}, err
|
|
}
|
|
|
|
usage = snapshots.Usage(du)
|
|
}
|
|
|
|
return usage, nil
|
|
}
|
|
|
|
func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
|
|
return o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
|
|
}
|
|
|
|
func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
|
|
return o.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
|
|
}
|
|
|
|
// Mounts returns the mounts for the transaction identified by key. Can be
|
|
// called on an read-write or readonly transaction.
|
|
//
|
|
// This can be used to recover mounts after calling View or Prepare.
|
|
func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s, err := storage.GetSnapshot(ctx, key)
|
|
_, info, _, err := storage.GetInfo(ctx, key)
|
|
t.Rollback()
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to get active mount")
|
|
}
|
|
return o.mounts(s, info), nil
|
|
}
|
|
|
|
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer func() {
|
|
if err != nil {
|
|
if rerr := t.Rollback(); rerr != nil {
|
|
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
|
|
}
|
|
}
|
|
}()
|
|
|
|
// grab the existing id
|
|
id, _, _, err := storage.GetInfo(ctx, key)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
usage, err := fs.DiskUsage(ctx, o.upperPath(id))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil {
|
|
return errors.Wrap(err, "failed to commit snapshot")
|
|
}
|
|
return t.Commit()
|
|
}
|
|
|
|
// Remove abandons the snapshot identified by key. The snapshot will
|
|
// immediately become unavailable and unrecoverable. Disk space will
|
|
// be freed up on the next call to `Cleanup`.
|
|
func (o *snapshotter) Remove(ctx context.Context, key string) (err error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
if rerr := t.Rollback(); rerr != nil {
|
|
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
|
|
}
|
|
}
|
|
}()
|
|
|
|
_, _, err = storage.Remove(ctx, key)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to remove")
|
|
}
|
|
|
|
if !o.asyncRemove {
|
|
var removals []string
|
|
removals, err = o.getCleanupDirectories(ctx, t)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to get directories for removal")
|
|
}
|
|
|
|
// Remove directories after the transaction is closed, failures must not
|
|
// return error since the transaction is committed with the removal
|
|
// key no longer available.
|
|
defer func() {
|
|
if err == nil {
|
|
for _, dir := range removals {
|
|
if err := os.RemoveAll(dir); err != nil {
|
|
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
}
|
|
|
|
return t.Commit()
|
|
}
|
|
|
|
// Walk the committed snapshots.
|
|
func (o *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer t.Rollback()
|
|
return storage.WalkInfo(ctx, fn, fs...)
|
|
}
|
|
|
|
// Cleanup cleans up disk resources from removed or abandoned snapshots
|
|
func (o *snapshotter) Cleanup(ctx context.Context) error {
|
|
cleanup, err := o.cleanupDirectories(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, dir := range cleanup {
|
|
if err := os.RemoveAll(dir); err != nil {
|
|
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (o *snapshotter) cleanupDirectories(ctx context.Context) ([]string, error) {
|
|
// Get a write transaction to ensure no other write transaction can be entered
|
|
// while the cleanup is scanning.
|
|
ctx, t, err := o.ms.TransactionContext(ctx, true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer t.Rollback()
|
|
return o.getCleanupDirectories(ctx, t)
|
|
}
|
|
|
|
func (o *snapshotter) getCleanupDirectories(ctx context.Context, t storage.Transactor) ([]string, error) {
|
|
ids, err := storage.IDMap(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
snapshotDir := filepath.Join(o.root, "snapshots")
|
|
fd, err := os.Open(snapshotDir)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer fd.Close()
|
|
|
|
dirs, err := fd.Readdirnames(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cleanup := []string{}
|
|
for _, d := range dirs {
|
|
if _, ok := ids[d]; ok {
|
|
continue
|
|
}
|
|
|
|
cleanup = append(cleanup, filepath.Join(snapshotDir, d))
|
|
}
|
|
|
|
return cleanup, nil
|
|
}
|
|
|
|
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
|
|
ctx, t, err := o.ms.TransactionContext(ctx, true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var td, path string
|
|
defer func() {
|
|
if err != nil {
|
|
if td != "" {
|
|
if err1 := os.RemoveAll(td); err1 != nil {
|
|
log.G(ctx).WithError(err1).Warn("failed to cleanup temp snapshot directory")
|
|
}
|
|
}
|
|
if path != "" {
|
|
if err1 := os.RemoveAll(path); err1 != nil {
|
|
log.G(ctx).WithError(err1).WithField("path", path).Error("failed to reclaim snapshot directory, directory may need removal")
|
|
err = errors.Wrapf(err, "failed to remove path: %v", err1)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
snapshotDir := filepath.Join(o.root, "snapshots")
|
|
td, err = o.prepareDirectory(ctx, snapshotDir, kind)
|
|
if err != nil {
|
|
if rerr := t.Rollback(); rerr != nil {
|
|
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
|
|
}
|
|
return nil, errors.Wrap(err, "failed to create prepare snapshot dir")
|
|
}
|
|
rollback := true
|
|
defer func() {
|
|
if rollback {
|
|
if rerr := t.Rollback(); rerr != nil {
|
|
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
|
|
}
|
|
}
|
|
}()
|
|
|
|
s, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to create snapshot")
|
|
}
|
|
|
|
if len(s.ParentIDs) > 0 {
|
|
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to stat parent")
|
|
}
|
|
|
|
stat := st.Sys().(*syscall.Stat_t)
|
|
|
|
if err := os.Lchown(filepath.Join(td, "fs"), int(stat.Uid), int(stat.Gid)); err != nil {
|
|
if rerr := t.Rollback(); rerr != nil {
|
|
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
|
|
}
|
|
return nil, errors.Wrap(err, "failed to chown")
|
|
}
|
|
}
|
|
|
|
path = filepath.Join(snapshotDir, s.ID)
|
|
if err = os.Rename(td, path); err != nil {
|
|
return nil, errors.Wrap(err, "failed to rename")
|
|
}
|
|
td = ""
|
|
|
|
_, info, _, err := storage.GetInfo(ctx, key)
|
|
|
|
rollback = false
|
|
if err = t.Commit(); err != nil {
|
|
return nil, errors.Wrap(err, "commit failed")
|
|
}
|
|
|
|
return o.mounts(s, info), nil
|
|
}
|
|
|
|
func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string, kind snapshots.Kind) (string, error) {
|
|
td, err := ioutil.TempDir(snapshotDir, "new-")
|
|
if err != nil {
|
|
return "", errors.Wrap(err, "failed to create temp dir")
|
|
}
|
|
|
|
if err := os.Mkdir(filepath.Join(td, "fs"), 0755); err != nil {
|
|
return td, err
|
|
}
|
|
|
|
if kind == snapshots.KindActive {
|
|
if err := os.Mkdir(filepath.Join(td, "work"), 0711); err != nil {
|
|
return td, err
|
|
}
|
|
}
|
|
|
|
return td, nil
|
|
}
|
|
|
|
func (o *snapshotter) mounts(s storage.Snapshot, info snapshots.Info) []mount.Mount {
|
|
if len(s.ParentIDs) == 0 {
|
|
// if we only have one layer/no parents then just return a bind mount as overlay
|
|
// will not work
|
|
roFlag := "rw"
|
|
if s.Kind == snapshots.KindView {
|
|
roFlag = "ro"
|
|
}
|
|
|
|
return []mount.Mount{
|
|
{
|
|
Source: o.upperPath(s.ID),
|
|
Type: "bind",
|
|
Options: []string{
|
|
roFlag,
|
|
"rbind",
|
|
},
|
|
},
|
|
}
|
|
}
|
|
var options []string
|
|
|
|
if s.Kind == snapshots.KindActive {
|
|
options = append(options,
|
|
fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
|
|
fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)),
|
|
)
|
|
} else if len(s.ParentIDs) == 1 {
|
|
return []mount.Mount{
|
|
{
|
|
Source: o.upperPath(s.ParentIDs[0]),
|
|
Type: "bind",
|
|
Options: []string{
|
|
"ro",
|
|
"rbind",
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
parentPaths := make([]string, len(s.ParentIDs))
|
|
for i := range s.ParentIDs {
|
|
parentPaths[i] = o.upperPath(s.ParentIDs[i])
|
|
}
|
|
|
|
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
|
|
if mapping, ok := info.Labels["containerd.io/snapshot/uidmapping"]; ok {
|
|
options = append(options, fmt.Sprintf("uidmapping=%s", mapping))
|
|
}
|
|
if mapping, ok := info.Labels["containerd.io/snapshot/gidmapping"]; ok {
|
|
options = append(options, fmt.Sprintf("gidmapping=%s", mapping))
|
|
}
|
|
return []mount.Mount{
|
|
{
|
|
Type: "fuse3." + fuseoverlayfsBinary,
|
|
Source: "overlay",
|
|
Options: options,
|
|
},
|
|
}
|
|
|
|
}
|
|
|
|
func (o *snapshotter) upperPath(id string) string {
|
|
return filepath.Join(o.root, "snapshots", id, "fs")
|
|
}
|
|
|
|
func (o *snapshotter) workPath(id string) string {
|
|
return filepath.Join(o.root, "snapshots", id, "work")
|
|
}
|
|
|
|
// Close closes the snapshotter
|
|
func (o *snapshotter) Close() error {
|
|
return o.ms.Close()
|
|
}
|