Commit cdb4e53a by Christian Brauner

Cleanup bdev.c after splitting into modules

The function - bdev_get(); becomes static. It is called from nowhere else so far and never appeared in any header. Minor changes - Avoid comparisons between int and size_t types. Use size_t where possible else cast to size_t when it makes sense. - insert missing spaces between operators - put declarations for all static functions at the top Signed-off-by: 's avatarChristian Brauner <christian.brauner@mailbox.org>
parent bf76c012
......@@ -189,12 +189,30 @@ static const struct bdev_ops zfs_ops = {
.can_backup = true,
};
struct bdev_type {
const char *name;
const struct bdev_ops *ops;
};
static const struct bdev_type bdevs[] = {
{.name = "zfs", .ops = &zfs_ops,},
{.name = "lvm", .ops = &lvm_ops,},
{.name = "rbd", .ops = &rbd_ops,},
{.name = "btrfs", .ops = &btrfs_ops,},
{.name = "dir", .ops = &dir_ops,},
{.name = "aufs", .ops = &aufs_ops,},
{.name = "overlayfs", .ops = &ovl_ops,},
{.name = "loop", .ops = &loop_ops,},
{.name = "nbd", .ops = &nbd_ops,},
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
/* helpers */
/*
* These are copied from conf.c. However as conf.c will be moved to using
* the callback system, they can be pulled from there eventually, so we
* don't need to pollute utils.c with these low level functions
*/
static const struct bdev_type *bdev_query(const char *src);
static struct bdev *bdev_get(const char *type);
static struct bdev *do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs);
static int find_fstype_cb(char *buffer, void *data);
static char *linkderef(char *path, char *dest);
static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
......@@ -233,8 +251,8 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
}
while ((p2 = strstr(src, oldname)) != NULL) {
strncpy(p, src, p2-src); // copy text up to oldname
p += p2-src; // move target pointer (p)
strncpy(p, src, p2 - src); // copy text up to oldname
p += p2 - src; // move target pointer (p)
p += sprintf(p, "%s", name); // print new name in place of oldname
src = p2 + l2; // move src to end of oldname
}
......@@ -243,156 +261,361 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
}
/*
* return block size of dev->src in units of bytes
* attach_block_device returns true if all went well,
* meaning either a block device was attached or was not
* needed. It returns false if something went wrong and
* container startup should be stopped.
*/
int blk_getsize(struct bdev *bdev, uint64_t *size)
bool attach_block_device(struct lxc_conf *conf)
{
int fd, ret;
char *path = bdev->src;
char *path;
if (strcmp(bdev->type, "loop") == 0)
path = bdev->src + 5;
if (!conf->rootfs.path)
return true;
path = conf->rootfs.path;
if (!requires_nbd(path))
return true;
path = strchr(path, ':');
if (!path)
return false;
path++;
if (!attach_nbd(path, conf))
return false;
return true;
}
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
bool bdev_can_backup(struct lxc_conf *conf)
{
struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
bool ret;
ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
close(fd);
if (!bdev)
return false;
ret = bdev->ops->can_backup;
bdev_put(bdev);
return ret;
}
/*
* These are copied from conf.c. However as conf.c will be moved to using
* the callback system, they can be pulled from there eventually, so we
* don't need to pollute utils.c with these low level functions
* If we're not snaphotting, then bdev_copy becomes a simple case of mount
* the original, mount the new, and rsync the contents.
*/
static int find_fstype_cb(char* buffer, void *data)
struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
const char *lxcpath, const char *bdevtype, int flags,
const char *bdevdata, uint64_t newsize, int *needs_rdep)
{
struct cbarg {
const char *rootfs;
const char *target;
const char *options;
} *cbarg = data;
struct bdev *orig, *new;
pid_t pid;
int ret;
bool snap = flags & LXC_CLONE_SNAPSHOT;
bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
const char *src = c0->lxc_conf->rootfs.path;
const char *oldname = c0->name;
const char *oldpath = c0->config_path;
struct rsync_data data;
unsigned long mntflags;
char *mntdata;
char *fstype;
/* if the container name doesn't show up in the rootfs path, then
* we don't know how to come up with a new name
*/
if (strstr(src, oldname) == NULL) {
ERROR("original rootfs path %s doesn't include container name %s",
src, oldname);
return NULL;
}
/* we don't try 'nodev' entries */
if (strstr(buffer, "nodev"))
return 0;
orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
if (!orig) {
ERROR("failed to detect blockdev type for %s", src);
return NULL;
}
fstype = buffer;
fstype += lxc_char_left_gc(fstype, strlen(fstype));
fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
if (!orig->dest) {
int ret;
size_t len;
struct stat sb;
DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
cbarg->rootfs, cbarg->target, fstype);
len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
orig->dest = malloc(len);
if (!orig->dest) {
ERROR("out of memory");
bdev_put(orig);
return NULL;
}
ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
if (ret < 0 || (size_t)ret >= len) {
ERROR("rootfs path too long");
bdev_put(orig);
return NULL;
}
ret = stat(orig->dest, &sb);
if (ret < 0 && errno == ENOENT)
if (mkdir_p(orig->dest, 0755) < 0)
WARN("Error creating '%s', continuing.", orig->dest);
}
if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
free(mntdata);
return 0;
/*
* special case for snapshot - if caller requested maybe_snapshot and
* keepbdevtype and backing store is directory, then proceed with a copy
* clone rather than returning error
*/
if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
snap = false;
/*
* If newtype is NULL and snapshot is set, then use overlayfs
*/
if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
bdevtype = "overlayfs";
if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
ERROR("Unsupported snapshot type for unprivileged users");
bdev_put(orig);
return NULL;
}
if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
DEBUG("mount failed with error: %s", strerror(errno));
free(mntdata);
return 0;
*needs_rdep = 0;
if (bdevtype && strcmp(orig->type, "dir") == 0 &&
(strcmp(bdevtype, "aufs") == 0 ||
strcmp(bdevtype, "overlayfs") == 0)) {
*needs_rdep = 1;
} else if (snap && strcmp(orig->type, "lvm") == 0 &&
!lvm_is_thin_volume(orig->src)) {
*needs_rdep = 1;
}
free(mntdata);
new = bdev_get(bdevtype ? bdevtype : orig->type);
if (!new) {
ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
bdev_put(orig);
return NULL;
}
INFO("mounted '%s' on '%s', with fstype '%s'",
cbarg->rootfs, cbarg->target, fstype);
if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
snap, newsize, c0->lxc_conf) < 0) {
ERROR("failed getting pathnames for cloned storage: %s", src);
goto err;
}
return 1;
}
if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
WARN("Failed to update ownership of %s", new->dest);
int mount_unknown_fs(const char *rootfs, const char *target,
const char *options)
{
struct cbarg {
const char *rootfs;
const char *target;
const char *options;
} cbarg = {
.rootfs = rootfs,
.target = target,
.options = options,
};
if (snap)
return new;
/*
* find the filesystem type with brute force:
* first we check with /etc/filesystems, in case the modules
* are auto-loaded and fall back to the supported kernel fs
* https://github.com/lxc/lxc/issues/131
* Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
*/
char *fsfile[] = {
"/etc/filesystems",
"/proc/filesystems",
};
size_t i;
for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
int ret;
if (bdevtype &&
strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
btrfs_same_fs(orig->dest, new->dest) == 0) {
if (btrfs_destroy(new) < 0) {
ERROR("Error destroying %s subvolume", new->dest);
goto err;
}
if (mkdir_p(new->dest, 0755) < 0) {
ERROR("Error creating %s directory", new->dest);
goto err;
}
if (btrfs_snapshot(orig->dest, new->dest) < 0) {
ERROR("Error restoring %s to %s", orig->dest, new->dest);
goto err;
}
bdev_put(orig);
return new;
}
if (access(fsfile[i], F_OK))
continue;
pid = fork();
if (pid < 0) {
SYSERROR("fork");
goto err;
}
ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
if (pid > 0) {
int ret = wait_for_pid(pid);
bdev_put(orig);
if (ret < 0) {
ERROR("failed to parse '%s'", fsfile[i]);
return -1;
bdev_put(new);
return NULL;
}
if (ret)
return 0;
return new;
}
ERROR("failed to determine fs type for '%s'", rootfs);
return -1;
}
data.orig = orig;
data.new = new;
if (am_unpriv())
ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
else
ret = rsync_rootfs(&data);
int do_mkfs(const char *path, const char *fstype)
exit(ret == 0 ? 0 : 1);
err:
bdev_put(orig);
bdev_put(new);
return NULL;
}
/*
* bdev_create:
* Create a backing store for a container.
* If successful, return a struct bdev *, with the bdev mounted and ready
* for use. Before completing, the caller will need to call the
* umount operation and bdev_put().
* @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
* @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
* @cname: the container name
* @specs: details about the backing store to create, like fstype
*/
struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
struct bdev_specs *specs)
{
pid_t pid;
struct bdev *bdev;
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
if ((pid = fork()) < 0) {
ERROR("error forking");
return -1;
if (!type)
return do_bdev_create(dest, "dir", cname, specs);
if (strcmp(type, "best") == 0) {
int i;
// try for the best backing store type, according to our
// opinionated preferences
for (i = 0; best_options[i]; i++) {
if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
return bdev;
}
return NULL; // 'dir' should never fail, so this shouldn't happen
}
if (pid > 0)
return wait_for_pid(pid);
// If the file is not a block device, we don't want mkfs to ask
// us about whether to proceed.
if (null_stdfds() < 0)
exit(1);
execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
exit(1);
// -B lvm,dir
if (strchr(type, ',') != NULL) {
char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token;
strcpy(dup, type);
for (token = strtok_r(dup, ",", &saveptr); token;
token = strtok_r(NULL, ",", &saveptr)) {
if ((bdev = do_bdev_create(dest, token, cname, specs)))
return bdev;
}
}
return do_bdev_create(dest, type, cname, specs);
}
static char *linkderef(char *path, char *dest)
bool bdev_destroy(struct lxc_conf *conf)
{
struct stat sbuf;
ssize_t ret;
struct bdev *r;
bool ret = false;
ret = stat(path, &sbuf);
if (ret < 0)
r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
if (!r)
return ret;
if (r->ops->destroy(r) == 0)
ret = true;
bdev_put(r);
return ret;
}
int bdev_destroy_wrapper(void *data)
{
struct lxc_conf *conf = data;
if (setgid(0) < 0) {
ERROR("Failed to setgid to 0");
return -1;
}
if (setgroups(0, NULL) < 0)
WARN("Failed to clear groups");
if (setuid(0) < 0) {
ERROR("Failed to setuid to 0");
return -1;
}
if (!bdev_destroy(conf))
return -1;
else
return 0;
}
struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
const char *mntopts)
{
struct bdev *bdev;
const struct bdev_type *q;
if (!src)
src = conf->rootfs.path;
if (!src)
return NULL;
if (!S_ISLNK(sbuf.st_mode))
return path;
ret = readlink(path, dest, MAXPATHLEN);
if (ret < 0) {
SYSERROR("error reading link %s", path);
q = bdev_query(src);
if (!q)
return NULL;
} else if (ret >= MAXPATHLEN) {
ERROR("link in %s too long", path);
bdev = malloc(sizeof(struct bdev));
if (!bdev)
return NULL;
}
dest[ret] = '\0';
return dest;
memset(bdev, 0, sizeof(struct bdev));
bdev->ops = q->ops;
bdev->type = q->name;
if (mntopts)
bdev->mntopts = strdup(mntopts);
if (src)
bdev->src = strdup(src);
if (dst)
bdev->dest = strdup(dst);
if (strcmp(bdev->type, "nbd") == 0)
bdev->nbd_idx = conf->nbd_idx;
return bdev;
}
bool bdev_is_dir(struct lxc_conf *conf, const char *path)
{
struct bdev *orig = bdev_init(conf, path, NULL, NULL);
bool ret = false;
if (!orig)
return ret;
if (strcmp(orig->type, "dir") == 0)
ret = true;
bdev_put(orig);
return ret;
}
void bdev_put(struct bdev *bdev)
{
free(bdev->mntopts);
free(bdev->src);
free(bdev->dest);
free(bdev);
}
/*
* return block size of dev->src in units of bytes
*/
int blk_getsize(struct bdev *bdev, uint64_t *size)
{
int fd, ret;
char *path = bdev->src;
if (strcmp(bdev->type, "loop") == 0)
path = bdev->src + 5;
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
close(fd);
return ret;
}
void detach_block_device(struct lxc_conf *conf)
{
if (conf->nbd_idx != -1)
detach_nbd_idx(conf->nbd_idx);
}
/*
......@@ -428,7 +651,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
int status;
close(p[1]);
memset(type, 0, len);
ret = read(p[0], type, len-1);
ret = read(p[0], type, len - 1);
close(p[0]);
if (ret < 0) {
SYSERROR("error reading from pipe");
......@@ -440,7 +663,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
return -1;
}
wait(&status);
type[len-1] = '\0';
type[len - 1] = '\0';
INFO("detected fstype %s for %s", type, srcdev);
return ret;
}
......@@ -475,11 +698,11 @@ int detect_fs(struct bdev *bdev, char *type, int len)
*sp1 = '\0';
if (strcmp(line, l))
continue;
sp2 = strchr(sp1+1, ' ');
sp2 = strchr(sp1 + 1, ' ');
if (!sp2)
exit(1);
*sp2 = '\0';
sp3 = strchr(sp2+1, ' ');
sp3 = strchr(sp2 + 1, ' ');
if (!sp3)
exit(1);
*sp3 = '\0';
......@@ -491,13 +714,29 @@ int detect_fs(struct bdev *bdev, char *type, int len)
exit(1);
}
struct bdev_type {
const char *name;
const struct bdev_ops *ops;
};
int do_mkfs(const char *path, const char *fstype)
{
pid_t pid;
if ((pid = fork()) < 0) {
ERROR("error forking");
return -1;
}
if (pid > 0)
return wait_for_pid(pid);
// If the file is not a block device, we don't want mkfs to ask
// us about whether to proceed.
if (null_stdfds() < 0)
exit(1);
execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
exit(1);
}
// this will return 1 for physical disks, qemu-nbd, loop, etc
// right now only lvm is a block device
/*
* This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
* is a block device.
*/
int is_blktype(struct bdev *b)
{
if (strcmp(b->type, "lvm") == 0)
......@@ -505,39 +744,102 @@ int is_blktype(struct bdev *b)
return 0;
}
static const struct bdev_type bdevs[] = {
{.name = "zfs", .ops = &zfs_ops,},
{.name = "lvm", .ops = &lvm_ops,},
{.name = "rbd", .ops = &rbd_ops,},
{.name = "btrfs", .ops = &btrfs_ops,},
{.name = "dir", .ops = &dir_ops,},
{.name = "aufs", .ops = &aufs_ops,},
{.name = "overlayfs", .ops = &ovl_ops,},
{.name = "loop", .ops = &loop_ops,},
{.name = "nbd", .ops = &nbd_ops,},
};
int mount_unknown_fs(const char *rootfs, const char *target,
const char *options)
{
struct cbarg {
const char *rootfs;
const char *target;
const char *options;
} cbarg = {
.rootfs = rootfs,
.target = target,
.options = options,
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
/*
* find the filesystem type with brute force:
* first we check with /etc/filesystems, in case the modules
* are auto-loaded and fall back to the supported kernel fs
*/
char *fsfile[] = {
"/etc/filesystems",
"/proc/filesystems",
};
void bdev_put(struct bdev *bdev)
{
free(bdev->mntopts);
free(bdev->src);
free(bdev->dest);
free(bdev);
}
size_t i;
for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
struct bdev *bdev_get(const char *type)
{
int i;
struct bdev *bdev;
int ret;
for (i=0; i<numbdevs; i++) {
if (strcmp(bdevs[i].name, type) == 0)
break;
}
if (i == numbdevs)
return NULL;
if (access(fsfile[i], F_OK))
continue;
ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
if (ret < 0) {
ERROR("failed to parse '%s'", fsfile[i]);
return -1;
}
if (ret)
return 0;
}
ERROR("failed to determine fs type for '%s'", rootfs);
return -1;
}
bool rootfs_is_blockdev(struct lxc_conf *conf)
{
const struct bdev_type *q;
struct stat st;
int ret;
if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
strlen(conf->rootfs.path) == 0)
return false;
ret = stat(conf->rootfs.path, &st);
if (ret == 0 && S_ISBLK(st.st_mode))
return true;
q = bdev_query(conf->rootfs.path);
if (!q)
return false;
if (strcmp(q->name, "lvm") == 0 ||
strcmp(q->name, "loop") == 0 ||
strcmp(q->name, "nbd") == 0)
return true;
return false;
}
static struct bdev *do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev = bdev_get(type);
if (!bdev) {
return NULL;
}
if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
bdev_put(bdev);
return NULL;
}
return bdev;
}
static struct bdev *bdev_get(const char *type)
{
int i;
struct bdev *bdev;
for (i = 0; i < numbdevs; i++) {
if (strcmp(bdevs[i].name, type) == 0)
break;
}
if (i == numbdevs)
return NULL;
bdev = malloc(sizeof(struct bdev));
if (!bdev)
return NULL;
......@@ -550,7 +852,7 @@ struct bdev *bdev_get(const char *type)
static const struct bdev_type *bdev_query(const char *src)
{
int i;
for (i=0; i<numbdevs; i++) {
for (i = 0; i < numbdevs; i++) {
int r;
r = bdevs[i].ops->detect(src);
if (r)
......@@ -562,62 +864,73 @@ static const struct bdev_type *bdev_query(const char *src)
return &bdevs[i];
}
struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
const char *mntopts)
/*
* These are copied from conf.c. However as conf.c will be moved to using
* the callback system, they can be pulled from there eventually, so we
* don't need to pollute utils.c with these low level functions
*/
static int find_fstype_cb(char* buffer, void *data)
{
struct bdev *bdev;
const struct bdev_type *q;
struct cbarg {
const char *rootfs;
const char *target;
const char *options;
} *cbarg = data;
if (!src)
src = conf->rootfs.path;
unsigned long mntflags;
char *mntdata;
char *fstype;
if (!src)
return NULL;
/* we don't try 'nodev' entries */
if (strstr(buffer, "nodev"))
return 0;
q = bdev_query(src);
if (!q)
return NULL;
fstype = buffer;
fstype += lxc_char_left_gc(fstype, strlen(fstype));
fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
bdev = malloc(sizeof(struct bdev));
if (!bdev)
return NULL;
memset(bdev, 0, sizeof(struct bdev));
bdev->ops = q->ops;
bdev->type = q->name;
if (mntopts)
bdev->mntopts = strdup(mntopts);
if (src)
bdev->src = strdup(src);
if (dst)
bdev->dest = strdup(dst);
if (strcmp(bdev->type, "nbd") == 0)
bdev->nbd_idx = conf->nbd_idx;
DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
cbarg->rootfs, cbarg->target, fstype);
return bdev;
}
if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
free(mntdata);
return 0;
}
bool bdev_is_dir(struct lxc_conf *conf, const char *path)
{
struct bdev *orig = bdev_init(conf, path, NULL, NULL);
bool ret = false;
if (!orig)
return ret;
if (strcmp(orig->type, "dir") == 0)
ret = true;
bdev_put(orig);
return ret;
if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
DEBUG("mount failed with error: %s", strerror(errno));
free(mntdata);
return 0;
}
free(mntdata);
INFO("mounted '%s' on '%s', with fstype '%s'",
cbarg->rootfs, cbarg->target, fstype);
return 1;
}
bool bdev_can_backup(struct lxc_conf *conf)
static char *linkderef(char *path, char *dest)
{
struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
bool ret;
struct stat sbuf;
ssize_t ret;
if (!bdev)
return false;
ret = bdev->ops->can_backup;
bdev_put(bdev);
return ret;
ret = stat(path, &sbuf);
if (ret < 0)
return NULL;
if (!S_ISLNK(sbuf.st_mode))
return path;
ret = readlink(path, dest, MAXPATHLEN);
if (ret < 0) {
SYSERROR("error reading link %s", path);
return NULL;
} else if (ret >= MAXPATHLEN) {
ERROR("link in %s too long", path);
return NULL;
}
dest[ret] = '\0';
return dest;
}
/*
......@@ -649,315 +962,3 @@ static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
return true;
return false;
}
/*
* If we're not snaphotting, then bdev_copy becomes a simple case of mount
* the original, mount the new, and rsync the contents.
*/
struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
const char *lxcpath, const char *bdevtype, int flags,
const char *bdevdata, uint64_t newsize, int *needs_rdep)
{
struct bdev *orig, *new;
pid_t pid;
int ret;
bool snap = flags & LXC_CLONE_SNAPSHOT;
bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
const char *src = c0->lxc_conf->rootfs.path;
const char *oldname = c0->name;
const char *oldpath = c0->config_path;
struct rsync_data data;
/* if the container name doesn't show up in the rootfs path, then
* we don't know how to come up with a new name
*/
if (strstr(src, oldname) == NULL) {
ERROR("original rootfs path %s doesn't include container name %s",
src, oldname);
return NULL;
}
orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
if (!orig) {
ERROR("failed to detect blockdev type for %s", src);
return NULL;
}
if (!orig->dest) {
int ret;
size_t len;
struct stat sb;
len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
orig->dest = malloc(len);
if (!orig->dest) {
ERROR("out of memory");
bdev_put(orig);
return NULL;
}
ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
if (ret < 0 || ret >= len) {
ERROR("rootfs path too long");
bdev_put(orig);
return NULL;
}
ret = stat(orig->dest, &sb);
if (ret < 0 && errno == ENOENT)
if (mkdir_p(orig->dest, 0755) < 0)
WARN("Error creating '%s', continuing.", orig->dest);
}
/*
* special case for snapshot - if caller requested maybe_snapshot and
* keepbdevtype and backing store is directory, then proceed with a copy
* clone rather than returning error
*/
if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
snap = false;
/*
* If newtype is NULL and snapshot is set, then use overlayfs
*/
if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
bdevtype = "overlayfs";
if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
ERROR("Unsupported snapshot type for unprivileged users");
bdev_put(orig);
return NULL;
}
*needs_rdep = 0;
if (bdevtype && strcmp(orig->type, "dir") == 0 &&
(strcmp(bdevtype, "aufs") == 0 ||
strcmp(bdevtype, "overlayfs") == 0)) {
*needs_rdep = 1;
} else if (snap && strcmp(orig->type, "lvm") == 0 &&
!lvm_is_thin_volume(orig->src)) {
*needs_rdep = 1;
}
new = bdev_get(bdevtype ? bdevtype : orig->type);
if (!new) {
ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
bdev_put(orig);
return NULL;
}
if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
snap, newsize, c0->lxc_conf) < 0) {
ERROR("failed getting pathnames for cloned storage: %s", src);
goto err;
}
if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
WARN("Failed to update ownership of %s", new->dest);
if (snap)
return new;
/*
* https://github.com/lxc/lxc/issues/131
* Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
*/
if (bdevtype &&
strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
btrfs_same_fs(orig->dest, new->dest) == 0) {
if (btrfs_destroy(new) < 0) {
ERROR("Error destroying %s subvolume", new->dest);
goto err;
}
if (mkdir_p(new->dest, 0755) < 0) {
ERROR("Error creating %s directory", new->dest);
goto err;
}
if (btrfs_snapshot(orig->dest, new->dest) < 0) {
ERROR("Error restoring %s to %s", orig->dest, new->dest);
goto err;
}
bdev_put(orig);
return new;
}
pid = fork();
if (pid < 0) {
SYSERROR("fork");
goto err;
}
if (pid > 0) {
int ret = wait_for_pid(pid);
bdev_put(orig);
if (ret < 0) {
bdev_put(new);
return NULL;
}
return new;
}
data.orig = orig;
data.new = new;
if (am_unpriv())
ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
else
ret = rsync_rootfs(&data);
exit(ret == 0 ? 0 : 1);
err:
bdev_put(orig);
bdev_put(new);
return NULL;
}
static struct bdev *do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev = bdev_get(type);
if (!bdev) {
return NULL;
}
if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
bdev_put(bdev);
return NULL;
}
return bdev;
}
/*
* bdev_create:
* Create a backing store for a container.
* If successful, return a struct bdev *, with the bdev mounted and ready
* for use. Before completing, the caller will need to call the
* umount operation and bdev_put().
* @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
* @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
* @cname: the container name
* @specs: details about the backing store to create, like fstype
*/
struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
struct bdev_specs *specs)
{
struct bdev *bdev;
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
if (!type)
return do_bdev_create(dest, "dir", cname, specs);
if (strcmp(type, "best") == 0) {
int i;
// try for the best backing store type, according to our
// opinionated preferences
for (i=0; best_options[i]; i++) {
if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
return bdev;
}
return NULL; // 'dir' should never fail, so this shouldn't happen
}
// -B lvm,dir
if (strchr(type, ',') != NULL) {
char *dup = alloca(strlen(type)+1), *saveptr = NULL, *token;
strcpy(dup, type);
for (token = strtok_r(dup, ",", &saveptr); token;
token = strtok_r(NULL, ",", &saveptr)) {
if ((bdev = do_bdev_create(dest, token, cname, specs)))
return bdev;
}
}
return do_bdev_create(dest, type, cname, specs);
}
bool rootfs_is_blockdev(struct lxc_conf *conf)
{
const struct bdev_type *q;
struct stat st;
int ret;
if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
strlen(conf->rootfs.path) == 0)
return false;
ret = stat(conf->rootfs.path, &st);
if (ret == 0 && S_ISBLK(st.st_mode))
return true;
q = bdev_query(conf->rootfs.path);
if (!q)
return false;
if (strcmp(q->name, "lvm") == 0 ||
strcmp(q->name, "loop") == 0 ||
strcmp(q->name, "nbd") == 0)
return true;
return false;
}
bool bdev_destroy(struct lxc_conf *conf)
{
struct bdev *r;
bool ret = false;
r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
if (!r)
return ret;
if (r->ops->destroy(r) == 0)
ret = true;
bdev_put(r);
return ret;
}
int bdev_destroy_wrapper(void *data)
{
struct lxc_conf *conf = data;
if (setgid(0) < 0) {
ERROR("Failed to setgid to 0");
return -1;
}
if (setgroups(0, NULL) < 0)
WARN("Failed to clear groups");
if (setuid(0) < 0) {
ERROR("Failed to setuid to 0");
return -1;
}
if (!bdev_destroy(conf))
return -1;
else
return 0;
}
/*
* attach_block_device returns true if all went well,
* meaning either a block device was attached or was not
* needed. It returns false if something went wrong and
* container startup should be stopped.
*/
bool attach_block_device(struct lxc_conf *conf)
{
char *path;
if (!conf->rootfs.path)
return true;
path = conf->rootfs.path;
if (!requires_nbd(path))
return true;
path = strchr(path, ':');
if (!path)
return false;
path++;
if (!attach_nbd(path, conf))
return false;
return true;
}
void detach_block_device(struct lxc_conf *conf)
{
if (conf->nbd_idx != -1)
detach_nbd_idx(conf->nbd_idx);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment