Commit cdb4e53a by Christian Brauner

Cleanup bdev.c after splitting into modules

The function - bdev_get(); becomes static. It is called from nowhere else so far and never appeared in any header. Minor changes - Avoid comparisons between int and size_t types. Use size_t where possible else cast to size_t when it makes sense. - insert missing spaces between operators - put declarations for all static functions at the top Signed-off-by: 's avatarChristian Brauner <christian.brauner@mailbox.org>
parent bf76c012
...@@ -189,12 +189,30 @@ static const struct bdev_ops zfs_ops = { ...@@ -189,12 +189,30 @@ static const struct bdev_ops zfs_ops = {
.can_backup = true, .can_backup = true,
}; };
struct bdev_type {
const char *name;
const struct bdev_ops *ops;
};
static const struct bdev_type bdevs[] = {
{.name = "zfs", .ops = &zfs_ops,},
{.name = "lvm", .ops = &lvm_ops,},
{.name = "rbd", .ops = &rbd_ops,},
{.name = "btrfs", .ops = &btrfs_ops,},
{.name = "dir", .ops = &dir_ops,},
{.name = "aufs", .ops = &aufs_ops,},
{.name = "overlayfs", .ops = &ovl_ops,},
{.name = "loop", .ops = &loop_ops,},
{.name = "nbd", .ops = &nbd_ops,},
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
/* helpers */ /* helpers */
/* static const struct bdev_type *bdev_query(const char *src);
* These are copied from conf.c. However as conf.c will be moved to using static struct bdev *bdev_get(const char *type);
* the callback system, they can be pulled from there eventually, so we static struct bdev *do_bdev_create(const char *dest, const char *type,
* don't need to pollute utils.c with these low level functions const char *cname, struct bdev_specs *specs);
*/
static int find_fstype_cb(char *buffer, void *data); static int find_fstype_cb(char *buffer, void *data);
static char *linkderef(char *path, char *dest); static char *linkderef(char *path, char *dest);
static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap, static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
...@@ -233,8 +251,8 @@ char *dir_new_path(char *src, const char *oldname, const char *name, ...@@ -233,8 +251,8 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
} }
while ((p2 = strstr(src, oldname)) != NULL) { while ((p2 = strstr(src, oldname)) != NULL) {
strncpy(p, src, p2-src); // copy text up to oldname strncpy(p, src, p2 - src); // copy text up to oldname
p += p2-src; // move target pointer (p) p += p2 - src; // move target pointer (p)
p += sprintf(p, "%s", name); // print new name in place of oldname p += sprintf(p, "%s", name); // print new name in place of oldname
src = p2 + l2; // move src to end of oldname src = p2 + l2; // move src to end of oldname
} }
...@@ -243,156 +261,361 @@ char *dir_new_path(char *src, const char *oldname, const char *name, ...@@ -243,156 +261,361 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
} }
/* /*
* return block size of dev->src in units of bytes * attach_block_device returns true if all went well,
* meaning either a block device was attached or was not
* needed. It returns false if something went wrong and
* container startup should be stopped.
*/ */
int blk_getsize(struct bdev *bdev, uint64_t *size) bool attach_block_device(struct lxc_conf *conf)
{ {
int fd, ret; char *path;
char *path = bdev->src;
if (strcmp(bdev->type, "loop") == 0) if (!conf->rootfs.path)
path = bdev->src + 5; return true;
path = conf->rootfs.path;
if (!requires_nbd(path))
return true;
path = strchr(path, ':');
if (!path)
return false;
path++;
if (!attach_nbd(path, conf))
return false;
return true;
}
fd = open(path, O_RDONLY); bool bdev_can_backup(struct lxc_conf *conf)
if (fd < 0) {
return -1; struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
bool ret;
ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes if (!bdev)
close(fd); return false;
ret = bdev->ops->can_backup;
bdev_put(bdev);
return ret; return ret;
} }
/* /*
* These are copied from conf.c. However as conf.c will be moved to using * If we're not snaphotting, then bdev_copy becomes a simple case of mount
* the callback system, they can be pulled from there eventually, so we * the original, mount the new, and rsync the contents.
* don't need to pollute utils.c with these low level functions
*/ */
static int find_fstype_cb(char* buffer, void *data) struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
const char *lxcpath, const char *bdevtype, int flags,
const char *bdevdata, uint64_t newsize, int *needs_rdep)
{ {
struct cbarg { struct bdev *orig, *new;
const char *rootfs; pid_t pid;
const char *target; int ret;
const char *options; bool snap = flags & LXC_CLONE_SNAPSHOT;
} *cbarg = data; bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
const char *src = c0->lxc_conf->rootfs.path;
const char *oldname = c0->name;
const char *oldpath = c0->config_path;
struct rsync_data data;
unsigned long mntflags; /* if the container name doesn't show up in the rootfs path, then
char *mntdata; * we don't know how to come up with a new name
char *fstype; */
if (strstr(src, oldname) == NULL) {
ERROR("original rootfs path %s doesn't include container name %s",
src, oldname);
return NULL;
}
/* we don't try 'nodev' entries */ orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
if (strstr(buffer, "nodev")) if (!orig) {
return 0; ERROR("failed to detect blockdev type for %s", src);
return NULL;
}
fstype = buffer; if (!orig->dest) {
fstype += lxc_char_left_gc(fstype, strlen(fstype)); int ret;
fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; size_t len;
struct stat sb;
DEBUG("trying to mount '%s'->'%s' with fstype '%s'", len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
cbarg->rootfs, cbarg->target, fstype); orig->dest = malloc(len);
if (!orig->dest) {
ERROR("out of memory");
bdev_put(orig);
return NULL;
}
ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
if (ret < 0 || (size_t)ret >= len) {
ERROR("rootfs path too long");
bdev_put(orig);
return NULL;
}
ret = stat(orig->dest, &sb);
if (ret < 0 && errno == ENOENT)
if (mkdir_p(orig->dest, 0755) < 0)
WARN("Error creating '%s', continuing.", orig->dest);
}
if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) { /*
free(mntdata); * special case for snapshot - if caller requested maybe_snapshot and
return 0; * keepbdevtype and backing store is directory, then proceed with a copy
* clone rather than returning error
*/
if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
snap = false;
/*
* If newtype is NULL and snapshot is set, then use overlayfs
*/
if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
bdevtype = "overlayfs";
if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
ERROR("Unsupported snapshot type for unprivileged users");
bdev_put(orig);
return NULL;
} }
if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { *needs_rdep = 0;
DEBUG("mount failed with error: %s", strerror(errno)); if (bdevtype && strcmp(orig->type, "dir") == 0 &&
free(mntdata); (strcmp(bdevtype, "aufs") == 0 ||
return 0; strcmp(bdevtype, "overlayfs") == 0)) {
*needs_rdep = 1;
} else if (snap && strcmp(orig->type, "lvm") == 0 &&
!lvm_is_thin_volume(orig->src)) {
*needs_rdep = 1;
} }
free(mntdata); new = bdev_get(bdevtype ? bdevtype : orig->type);
if (!new) {
ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
bdev_put(orig);
return NULL;
}
INFO("mounted '%s' on '%s', with fstype '%s'", if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
cbarg->rootfs, cbarg->target, fstype); snap, newsize, c0->lxc_conf) < 0) {
ERROR("failed getting pathnames for cloned storage: %s", src);
goto err;
}
return 1; if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
} WARN("Failed to update ownership of %s", new->dest);
int mount_unknown_fs(const char *rootfs, const char *target, if (snap)
const char *options) return new;
{
struct cbarg {
const char *rootfs;
const char *target;
const char *options;
} cbarg = {
.rootfs = rootfs,
.target = target,
.options = options,
};
/* /*
* find the filesystem type with brute force: * https://github.com/lxc/lxc/issues/131
* first we check with /etc/filesystems, in case the modules * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
* are auto-loaded and fall back to the supported kernel fs
*/ */
char *fsfile[] = { if (bdevtype &&
"/etc/filesystems", strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
"/proc/filesystems", btrfs_same_fs(orig->dest, new->dest) == 0) {
}; if (btrfs_destroy(new) < 0) {
ERROR("Error destroying %s subvolume", new->dest);
size_t i; goto err;
for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) { }
if (mkdir_p(new->dest, 0755) < 0) {
int ret; ERROR("Error creating %s directory", new->dest);
goto err;
}
if (btrfs_snapshot(orig->dest, new->dest) < 0) {
ERROR("Error restoring %s to %s", orig->dest, new->dest);
goto err;
}
bdev_put(orig);
return new;
}
if (access(fsfile[i], F_OK)) pid = fork();
continue; if (pid < 0) {
SYSERROR("fork");
goto err;
}
ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); if (pid > 0) {
int ret = wait_for_pid(pid);
bdev_put(orig);
if (ret < 0) { if (ret < 0) {
ERROR("failed to parse '%s'", fsfile[i]); bdev_put(new);
return -1; return NULL;
} }
return new;
if (ret)
return 0;
} }
ERROR("failed to determine fs type for '%s'", rootfs); data.orig = orig;
return -1; data.new = new;
} if (am_unpriv())
ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
else
ret = rsync_rootfs(&data);
int do_mkfs(const char *path, const char *fstype) exit(ret == 0 ? 0 : 1);
err:
bdev_put(orig);
bdev_put(new);
return NULL;
}
/*
* bdev_create:
* Create a backing store for a container.
* If successful, return a struct bdev *, with the bdev mounted and ready
* for use. Before completing, the caller will need to call the
* umount operation and bdev_put().
* @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
* @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
* @cname: the container name
* @specs: details about the backing store to create, like fstype
*/
struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
struct bdev_specs *specs)
{ {
pid_t pid; struct bdev *bdev;
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
if ((pid = fork()) < 0) { if (!type)
ERROR("error forking"); return do_bdev_create(dest, "dir", cname, specs);
return -1;
if (strcmp(type, "best") == 0) {
int i;
// try for the best backing store type, according to our
// opinionated preferences
for (i = 0; best_options[i]; i++) {
if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
return bdev;
}
return NULL; // 'dir' should never fail, so this shouldn't happen
} }
if (pid > 0)
return wait_for_pid(pid);
// If the file is not a block device, we don't want mkfs to ask // -B lvm,dir
// us about whether to proceed. if (strchr(type, ',') != NULL) {
if (null_stdfds() < 0) char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token;
exit(1); strcpy(dup, type);
execlp("mkfs", "mkfs", "-t", fstype, path, NULL); for (token = strtok_r(dup, ",", &saveptr); token;
exit(1); token = strtok_r(NULL, ",", &saveptr)) {
if ((bdev = do_bdev_create(dest, token, cname, specs)))
return bdev;
}
}
return do_bdev_create(dest, type, cname, specs);
} }
static char *linkderef(char *path, char *dest) bool bdev_destroy(struct lxc_conf *conf)
{ {
struct stat sbuf; struct bdev *r;
ssize_t ret; bool ret = false;
ret = stat(path, &sbuf); r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
if (ret < 0) if (!r)
return ret;
if (r->ops->destroy(r) == 0)
ret = true;
bdev_put(r);
return ret;
}
int bdev_destroy_wrapper(void *data)
{
struct lxc_conf *conf = data;
if (setgid(0) < 0) {
ERROR("Failed to setgid to 0");
return -1;
}
if (setgroups(0, NULL) < 0)
WARN("Failed to clear groups");
if (setuid(0) < 0) {
ERROR("Failed to setuid to 0");
return -1;
}
if (!bdev_destroy(conf))
return -1;
else
return 0;
}
struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
const char *mntopts)
{
struct bdev *bdev;
const struct bdev_type *q;
if (!src)
src = conf->rootfs.path;
if (!src)
return NULL; return NULL;
if (!S_ISLNK(sbuf.st_mode))
return path; q = bdev_query(src);
ret = readlink(path, dest, MAXPATHLEN); if (!q)
if (ret < 0) {
SYSERROR("error reading link %s", path);
return NULL; return NULL;
} else if (ret >= MAXPATHLEN) {
ERROR("link in %s too long", path); bdev = malloc(sizeof(struct bdev));
if (!bdev)
return NULL; return NULL;
} memset(bdev, 0, sizeof(struct bdev));
dest[ret] = '\0'; bdev->ops = q->ops;
return dest; bdev->type = q->name;
if (mntopts)
bdev->mntopts = strdup(mntopts);
if (src)
bdev->src = strdup(src);
if (dst)
bdev->dest = strdup(dst);
if (strcmp(bdev->type, "nbd") == 0)
bdev->nbd_idx = conf->nbd_idx;
return bdev;
}
bool bdev_is_dir(struct lxc_conf *conf, const char *path)
{
struct bdev *orig = bdev_init(conf, path, NULL, NULL);
bool ret = false;
if (!orig)
return ret;
if (strcmp(orig->type, "dir") == 0)
ret = true;
bdev_put(orig);
return ret;
}
void bdev_put(struct bdev *bdev)
{
free(bdev->mntopts);
free(bdev->src);
free(bdev->dest);
free(bdev);
}
/*
* return block size of dev->src in units of bytes
*/
int blk_getsize(struct bdev *bdev, uint64_t *size)
{
int fd, ret;
char *path = bdev->src;
if (strcmp(bdev->type, "loop") == 0)
path = bdev->src + 5;
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
close(fd);
return ret;
}
void detach_block_device(struct lxc_conf *conf)
{
if (conf->nbd_idx != -1)
detach_nbd_idx(conf->nbd_idx);
} }
/* /*
...@@ -428,7 +651,7 @@ int detect_fs(struct bdev *bdev, char *type, int len) ...@@ -428,7 +651,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
int status; int status;
close(p[1]); close(p[1]);
memset(type, 0, len); memset(type, 0, len);
ret = read(p[0], type, len-1); ret = read(p[0], type, len - 1);
close(p[0]); close(p[0]);
if (ret < 0) { if (ret < 0) {
SYSERROR("error reading from pipe"); SYSERROR("error reading from pipe");
...@@ -440,7 +663,7 @@ int detect_fs(struct bdev *bdev, char *type, int len) ...@@ -440,7 +663,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
return -1; return -1;
} }
wait(&status); wait(&status);
type[len-1] = '\0'; type[len - 1] = '\0';
INFO("detected fstype %s for %s", type, srcdev); INFO("detected fstype %s for %s", type, srcdev);
return ret; return ret;
} }
...@@ -475,11 +698,11 @@ int detect_fs(struct bdev *bdev, char *type, int len) ...@@ -475,11 +698,11 @@ int detect_fs(struct bdev *bdev, char *type, int len)
*sp1 = '\0'; *sp1 = '\0';
if (strcmp(line, l)) if (strcmp(line, l))
continue; continue;
sp2 = strchr(sp1+1, ' '); sp2 = strchr(sp1 + 1, ' ');
if (!sp2) if (!sp2)
exit(1); exit(1);
*sp2 = '\0'; *sp2 = '\0';
sp3 = strchr(sp2+1, ' '); sp3 = strchr(sp2 + 1, ' ');
if (!sp3) if (!sp3)
exit(1); exit(1);
*sp3 = '\0'; *sp3 = '\0';
...@@ -491,13 +714,29 @@ int detect_fs(struct bdev *bdev, char *type, int len) ...@@ -491,13 +714,29 @@ int detect_fs(struct bdev *bdev, char *type, int len)
exit(1); exit(1);
} }
struct bdev_type { int do_mkfs(const char *path, const char *fstype)
const char *name; {
const struct bdev_ops *ops; pid_t pid;
};
if ((pid = fork()) < 0) {
ERROR("error forking");
return -1;
}
if (pid > 0)
return wait_for_pid(pid);
// If the file is not a block device, we don't want mkfs to ask
// us about whether to proceed.
if (null_stdfds() < 0)
exit(1);
execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
exit(1);
}
// this will return 1 for physical disks, qemu-nbd, loop, etc /*
// right now only lvm is a block device * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
* is a block device.
*/
int is_blktype(struct bdev *b) int is_blktype(struct bdev *b)
{ {
if (strcmp(b->type, "lvm") == 0) if (strcmp(b->type, "lvm") == 0)
...@@ -505,39 +744,102 @@ int is_blktype(struct bdev *b) ...@@ -505,39 +744,102 @@ int is_blktype(struct bdev *b)
return 0; return 0;
} }
static const struct bdev_type bdevs[] = { int mount_unknown_fs(const char *rootfs, const char *target,
{.name = "zfs", .ops = &zfs_ops,}, const char *options)
{.name = "lvm", .ops = &lvm_ops,}, {
{.name = "rbd", .ops = &rbd_ops,}, struct cbarg {
{.name = "btrfs", .ops = &btrfs_ops,}, const char *rootfs;
{.name = "dir", .ops = &dir_ops,}, const char *target;
{.name = "aufs", .ops = &aufs_ops,}, const char *options;
{.name = "overlayfs", .ops = &ovl_ops,}, } cbarg = {
{.name = "loop", .ops = &loop_ops,}, .rootfs = rootfs,
{.name = "nbd", .ops = &nbd_ops,}, .target = target,
}; .options = options,
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type); /*
* find the filesystem type with brute force:
* first we check with /etc/filesystems, in case the modules
* are auto-loaded and fall back to the supported kernel fs
*/
char *fsfile[] = {
"/etc/filesystems",
"/proc/filesystems",
};
void bdev_put(struct bdev *bdev) size_t i;
{ for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
free(bdev->mntopts);
free(bdev->src);
free(bdev->dest);
free(bdev);
}
struct bdev *bdev_get(const char *type) int ret;
{
int i;
struct bdev *bdev;
for (i=0; i<numbdevs; i++) { if (access(fsfile[i], F_OK))
if (strcmp(bdevs[i].name, type) == 0) continue;
break;
} ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
if (i == numbdevs) if (ret < 0) {
return NULL; ERROR("failed to parse '%s'", fsfile[i]);
return -1;
}
if (ret)
return 0;
}
ERROR("failed to determine fs type for '%s'", rootfs);
return -1;
}
bool rootfs_is_blockdev(struct lxc_conf *conf)
{
const struct bdev_type *q;
struct stat st;
int ret;
if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
strlen(conf->rootfs.path) == 0)
return false;
ret = stat(conf->rootfs.path, &st);
if (ret == 0 && S_ISBLK(st.st_mode))
return true;
q = bdev_query(conf->rootfs.path);
if (!q)
return false;
if (strcmp(q->name, "lvm") == 0 ||
strcmp(q->name, "loop") == 0 ||
strcmp(q->name, "nbd") == 0)
return true;
return false;
}
static struct bdev *do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev = bdev_get(type);
if (!bdev) {
return NULL;
}
if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
bdev_put(bdev);
return NULL;
}
return bdev;
}
static struct bdev *bdev_get(const char *type)
{
int i;
struct bdev *bdev;
for (i = 0; i < numbdevs; i++) {
if (strcmp(bdevs[i].name, type) == 0)
break;
}
if (i == numbdevs)
return NULL;
bdev = malloc(sizeof(struct bdev)); bdev = malloc(sizeof(struct bdev));
if (!bdev) if (!bdev)
return NULL; return NULL;
...@@ -550,7 +852,7 @@ struct bdev *bdev_get(const char *type) ...@@ -550,7 +852,7 @@ struct bdev *bdev_get(const char *type)
static const struct bdev_type *bdev_query(const char *src) static const struct bdev_type *bdev_query(const char *src)
{ {
int i; int i;
for (i=0; i<numbdevs; i++) { for (i = 0; i < numbdevs; i++) {
int r; int r;
r = bdevs[i].ops->detect(src); r = bdevs[i].ops->detect(src);
if (r) if (r)
...@@ -562,62 +864,73 @@ static const struct bdev_type *bdev_query(const char *src) ...@@ -562,62 +864,73 @@ static const struct bdev_type *bdev_query(const char *src)
return &bdevs[i]; return &bdevs[i];
} }
struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst, /*
const char *mntopts) * These are copied from conf.c. However as conf.c will be moved to using
* the callback system, they can be pulled from there eventually, so we
* don't need to pollute utils.c with these low level functions
*/
static int find_fstype_cb(char* buffer, void *data)
{ {
struct bdev *bdev; struct cbarg {
const struct bdev_type *q; const char *rootfs;
const char *target;
const char *options;
} *cbarg = data;
if (!src) unsigned long mntflags;
src = conf->rootfs.path; char *mntdata;
char *fstype;
if (!src) /* we don't try 'nodev' entries */
return NULL; if (strstr(buffer, "nodev"))
return 0;
q = bdev_query(src); fstype = buffer;
if (!q) fstype += lxc_char_left_gc(fstype, strlen(fstype));
return NULL; fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
bdev = malloc(sizeof(struct bdev)); DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
if (!bdev) cbarg->rootfs, cbarg->target, fstype);
return NULL;
memset(bdev, 0, sizeof(struct bdev));
bdev->ops = q->ops;
bdev->type = q->name;
if (mntopts)
bdev->mntopts = strdup(mntopts);
if (src)
bdev->src = strdup(src);
if (dst)
bdev->dest = strdup(dst);
if (strcmp(bdev->type, "nbd") == 0)
bdev->nbd_idx = conf->nbd_idx;
return bdev; if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
} free(mntdata);
return 0;
}
bool bdev_is_dir(struct lxc_conf *conf, const char *path) if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
{ DEBUG("mount failed with error: %s", strerror(errno));
struct bdev *orig = bdev_init(conf, path, NULL, NULL); free(mntdata);
bool ret = false; return 0;
if (!orig) }
return ret;
if (strcmp(orig->type, "dir") == 0) free(mntdata);
ret = true;
bdev_put(orig); INFO("mounted '%s' on '%s', with fstype '%s'",
return ret; cbarg->rootfs, cbarg->target, fstype);
return 1;
} }
bool bdev_can_backup(struct lxc_conf *conf) static char *linkderef(char *path, char *dest)
{ {
struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL); struct stat sbuf;
bool ret; ssize_t ret;
if (!bdev) ret = stat(path, &sbuf);
return false; if (ret < 0)
ret = bdev->ops->can_backup; return NULL;
bdev_put(bdev); if (!S_ISLNK(sbuf.st_mode))
return ret; return path;
ret = readlink(path, dest, MAXPATHLEN);
if (ret < 0) {
SYSERROR("error reading link %s", path);
return NULL;
} else if (ret >= MAXPATHLEN) {
ERROR("link in %s too long", path);
return NULL;
}
dest[ret] = '\0';
return dest;
} }
/* /*
...@@ -649,315 +962,3 @@ static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap, ...@@ -649,315 +962,3 @@ static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
return true; return true;
return false; return false;
} }
/*
* If we're not snaphotting, then bdev_copy becomes a simple case of mount
* the original, mount the new, and rsync the contents.
*/
struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
const char *lxcpath, const char *bdevtype, int flags,
const char *bdevdata, uint64_t newsize, int *needs_rdep)
{
struct bdev *orig, *new;
pid_t pid;
int ret;
bool snap = flags & LXC_CLONE_SNAPSHOT;
bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
const char *src = c0->lxc_conf->rootfs.path;
const char *oldname = c0->name;
const char *oldpath = c0->config_path;
struct rsync_data data;
/* if the container name doesn't show up in the rootfs path, then
* we don't know how to come up with a new name
*/
if (strstr(src, oldname) == NULL) {
ERROR("original rootfs path %s doesn't include container name %s",
src, oldname);
return NULL;
}
orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
if (!orig) {
ERROR("failed to detect blockdev type for %s", src);
return NULL;
}
if (!orig->dest) {
int ret;
size_t len;
struct stat sb;
len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
orig->dest = malloc(len);
if (!orig->dest) {
ERROR("out of memory");
bdev_put(orig);
return NULL;
}
ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
if (ret < 0 || ret >= len) {
ERROR("rootfs path too long");
bdev_put(orig);
return NULL;
}
ret = stat(orig->dest, &sb);
if (ret < 0 && errno == ENOENT)
if (mkdir_p(orig->dest, 0755) < 0)
WARN("Error creating '%s', continuing.", orig->dest);
}
/*
* special case for snapshot - if caller requested maybe_snapshot and
* keepbdevtype and backing store is directory, then proceed with a copy
* clone rather than returning error
*/
if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
snap = false;
/*
* If newtype is NULL and snapshot is set, then use overlayfs
*/
if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
bdevtype = "overlayfs";
if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
ERROR("Unsupported snapshot type for unprivileged users");
bdev_put(orig);
return NULL;
}
*needs_rdep = 0;
if (bdevtype && strcmp(orig->type, "dir") == 0 &&
(strcmp(bdevtype, "aufs") == 0 ||
strcmp(bdevtype, "overlayfs") == 0)) {
*needs_rdep = 1;
} else if (snap && strcmp(orig->type, "lvm") == 0 &&
!lvm_is_thin_volume(orig->src)) {
*needs_rdep = 1;
}
new = bdev_get(bdevtype ? bdevtype : orig->type);
if (!new) {
ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
bdev_put(orig);
return NULL;
}
if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
snap, newsize, c0->lxc_conf) < 0) {
ERROR("failed getting pathnames for cloned storage: %s", src);
goto err;
}
if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
WARN("Failed to update ownership of %s", new->dest);
if (snap)
return new;
/*
* https://github.com/lxc/lxc/issues/131
* Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
*/
if (bdevtype &&
strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
btrfs_same_fs(orig->dest, new->dest) == 0) {
if (btrfs_destroy(new) < 0) {
ERROR("Error destroying %s subvolume", new->dest);
goto err;
}
if (mkdir_p(new->dest, 0755) < 0) {
ERROR("Error creating %s directory", new->dest);
goto err;
}
if (btrfs_snapshot(orig->dest, new->dest) < 0) {
ERROR("Error restoring %s to %s", orig->dest, new->dest);
goto err;
}
bdev_put(orig);
return new;
}
pid = fork();
if (pid < 0) {
SYSERROR("fork");
goto err;
}
if (pid > 0) {
int ret = wait_for_pid(pid);
bdev_put(orig);
if (ret < 0) {
bdev_put(new);
return NULL;
}
return new;
}
data.orig = orig;
data.new = new;
if (am_unpriv())
ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
else
ret = rsync_rootfs(&data);
exit(ret == 0 ? 0 : 1);
err:
bdev_put(orig);
bdev_put(new);
return NULL;
}
static struct bdev *do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev = bdev_get(type);
if (!bdev) {
return NULL;
}
if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
bdev_put(bdev);
return NULL;
}
return bdev;
}
/*
* bdev_create:
* Create a backing store for a container.
* If successful, return a struct bdev *, with the bdev mounted and ready
* for use. Before completing, the caller will need to call the
* umount operation and bdev_put().
* @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
* @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
* @cname: the container name
* @specs: details about the backing store to create, like fstype
*/
struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
struct bdev_specs *specs)
{
struct bdev *bdev;
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
if (!type)
return do_bdev_create(dest, "dir", cname, specs);
if (strcmp(type, "best") == 0) {
int i;
// try for the best backing store type, according to our
// opinionated preferences
for (i=0; best_options[i]; i++) {
if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
return bdev;
}
return NULL; // 'dir' should never fail, so this shouldn't happen
}
// -B lvm,dir
if (strchr(type, ',') != NULL) {
char *dup = alloca(strlen(type)+1), *saveptr = NULL, *token;
strcpy(dup, type);
for (token = strtok_r(dup, ",", &saveptr); token;
token = strtok_r(NULL, ",", &saveptr)) {
if ((bdev = do_bdev_create(dest, token, cname, specs)))
return bdev;
}
}
return do_bdev_create(dest, type, cname, specs);
}
bool rootfs_is_blockdev(struct lxc_conf *conf)
{
const struct bdev_type *q;
struct stat st;
int ret;
if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
strlen(conf->rootfs.path) == 0)
return false;
ret = stat(conf->rootfs.path, &st);
if (ret == 0 && S_ISBLK(st.st_mode))
return true;
q = bdev_query(conf->rootfs.path);
if (!q)
return false;
if (strcmp(q->name, "lvm") == 0 ||
strcmp(q->name, "loop") == 0 ||
strcmp(q->name, "nbd") == 0)
return true;
return false;
}
bool bdev_destroy(struct lxc_conf *conf)
{
struct bdev *r;
bool ret = false;
r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
if (!r)
return ret;
if (r->ops->destroy(r) == 0)
ret = true;
bdev_put(r);
return ret;
}
int bdev_destroy_wrapper(void *data)
{
struct lxc_conf *conf = data;
if (setgid(0) < 0) {
ERROR("Failed to setgid to 0");
return -1;
}
if (setgroups(0, NULL) < 0)
WARN("Failed to clear groups");
if (setuid(0) < 0) {
ERROR("Failed to setuid to 0");
return -1;
}
if (!bdev_destroy(conf))
return -1;
else
return 0;
}
/*
* attach_block_device returns true if all went well,
* meaning either a block device was attached or was not
* needed. It returns false if something went wrong and
* container startup should be stopped.
*/
bool attach_block_device(struct lxc_conf *conf)
{
char *path;
if (!conf->rootfs.path)
return true;
path = conf->rootfs.path;
if (!requires_nbd(path))
return true;
path = strchr(path, ':');
if (!path)
return false;
path++;
if (!attach_nbd(path, conf))
return false;
return true;
}
void detach_block_device(struct lxc_conf *conf)
{
if (conf->nbd_idx != -1)
detach_nbd_idx(conf->nbd_idx);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment