Commit ab572367 by Stéphane Graber

Merge pull request #670 from ksperis/master

Add Ceph RBD backingstore.
parents c2613927 93068560
......@@ -126,7 +126,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
</term>
<listitem>
<para>
'backingstore' is one of 'dir', 'lvm', 'loop', 'btrfs', 'zfs', or 'best'. The
'backingstore' is one of 'dir', 'lvm', 'loop', 'btrfs', 'zfs', 'rbd', or 'best'. The
default is 'dir', meaning that the container root filesystem
will be a directory under <filename>@LXCPATH@/container/rootfs</filename>.
This backing store type allows the optional
......@@ -158,6 +158,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
If backingstore is 'loop', you can use <replaceable>--fstype FSTYPE</replaceable> and <replaceable>--fssize SIZE</replaceable> as 'lvm'. The default values for these options are the same as 'lvm'.
</para>
<para>
If backingstore is 'rbd', then you will need to have a valid configuration in <filename>ceph.conf</filename> and a <filename>ceph.client.admin.keyring</filename> defined.
You can specify the following options :
<replaceable>--rbdname RBDNAME</replaceable> will create a blockdevice named RBDNAME rather than the default, which is the container name.
<replaceable>--rbdpool POOL</replaceable> will create the blockdevice in the pool named POOL, rather than the default, which is 'lxc'.
</para>
<para>
If backingstore is 'best', then lxc will try, in order, btrfs,
zfs, lvm, and finally a directory backing store.
</para>
......
......@@ -86,6 +86,7 @@ struct lxc_arguments {
char *fstype;
uint64_t fssize;
char *lvname, *vgname, *thinpool;
char *rbdname, *rbdpool;
char *zfsroot, *lowerdir, *dir;
/* lxc-execute */
......
......@@ -1181,6 +1181,168 @@ static const struct bdev_ops lvm_ops = {
.can_backup = false,
};
/*
* CEPH RBD ops
*/
static int rbd_detect(const char *path)
{
if ( memcmp(path, "/dev/rbd/", 9) == 0)
return 1;
return 0;
}
static int rbd_mount(struct bdev *bdev)
{
if (strcmp(bdev->type, "rbd"))
return -22;
if (!bdev->src || !bdev->dest)
return -22;
if ( !file_exists(bdev->src) ) {
// if blkdev does not exist it should be mapped, because it is not persistent on reboot
ERROR("Block device %s is not mapped.", bdev->src);
return -1;
}
return mount_unknown_fs(bdev->src, bdev->dest, bdev->mntopts);
}
static int rbd_umount(struct bdev *bdev)
{
if (strcmp(bdev->type, "rbd"))
return -22;
if (!bdev->src || !bdev->dest)
return -22;
return umount(bdev->dest);
}
static int rbd_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname,
const char *cname, const char *oldpath, const char *lxcpath, int snap,
uint64_t newsize, struct lxc_conf *conf)
{
ERROR("rbd clonepaths not implemented");
return -1;
}
static int rbd_destroy(struct bdev *orig)
{
pid_t pid;
char *rbdfullname;
if ( file_exists(orig->src) ) {
if ((pid = fork()) < 0)
return -1;
if (!pid) {
execlp("rbd", "rbd", "unmap" , orig->src, NULL);
exit(1);
}
if (wait_for_pid(pid) < 0)
return -1;
}
if ((pid = fork()) < 0)
return -1;
if (!pid) {
rbdfullname = alloca(strlen(orig->src) - 8);
strcpy( rbdfullname, &orig->src[9] );
execlp("rbd", "rbd", "rm" , rbdfullname, NULL);
exit(1);
}
return wait_for_pid(pid);
}
static int rbd_create(struct bdev *bdev, const char *dest, const char *n,
struct bdev_specs *specs)
{
const char *rbdpool, *rbdname = n, *fstype;
uint64_t size;
int ret, len;
char sz[24];
pid_t pid;
if (!specs)
return -1;
rbdpool = specs->rbd.rbdpool;
if (!rbdpool)
rbdpool = lxc_global_config_value("lxc.bdev.rbd.rbdpool");
if (specs->rbd.rbdname)
rbdname = specs->rbd.rbdname;
/* source device /dev/rbd/lxc/ctn */
len = strlen(rbdpool) + strlen(rbdname) + 11;
bdev->src = malloc(len);
if (!bdev->src)
return -1;
ret = snprintf(bdev->src, len, "/dev/rbd/%s/%s", rbdpool, rbdname);
if (ret < 0 || ret >= len)
return -1;
// fssize is in bytes.
size = specs->fssize;
if (!size)
size = DEFAULT_FS_SIZE;
// in megabytes for rbd tool
ret = snprintf(sz, 24, "%"PRIu64, size / 1024 / 1024 );
if (ret < 0 || ret >= 24)
exit(1);
if ((pid = fork()) < 0)
return -1;
if (!pid) {
execlp("rbd", "rbd", "create" , "--pool", rbdpool, rbdname, "--size", sz, NULL);
exit(1);
}
if (wait_for_pid(pid) < 0)
return -1;
if ((pid = fork()) < 0)
return -1;
if (!pid) {
execlp("rbd", "rbd", "map", "--pool", rbdpool, rbdname, NULL);
exit(1);
}
if (wait_for_pid(pid) < 0)
return -1;
fstype = specs->fstype;
if (!fstype)
fstype = DEFAULT_FSTYPE;
if (do_mkfs(bdev->src, fstype) < 0) {
ERROR("Error creating filesystem type %s on %s", fstype,
bdev->src);
return -1;
}
if (!(bdev->dest = strdup(dest)))
return -1;
if (mkdir_p(bdev->dest, 0755) < 0 && errno != EEXIST) {
ERROR("Error creating %s", bdev->dest);
return -1;
}
return 0;
}
static const struct bdev_ops rbd_ops = {
.detect = &rbd_detect,
.mount = &rbd_mount,
.umount = &rbd_umount,
.clone_paths = &rbd_clonepaths,
.destroy = &rbd_destroy,
.create = &rbd_create,
.can_snapshot = false,
.can_backup = false,
};
/*
* Return the full path of objid under dirid. Let's say dirid is
* /lxc/c1/rootfs, and objid is /lxc/c1/rootfs/a/b/c. Then we will
......@@ -3237,6 +3399,7 @@ static const struct bdev_ops nbd_ops = {
static const struct bdev_type bdevs[] = {
{.name = "zfs", .ops = &zfs_ops,},
{.name = "lvm", .ops = &lvm_ops,},
{.name = "rbd", .ops = &rbd_ops,},
{.name = "btrfs", .ops = &btrfs_ops,},
{.name = "dir", .ops = &dir_ops,},
{.name = "aufs", .ops = &aufs_ops,},
......@@ -3596,6 +3759,7 @@ err:
static struct bdev * do_bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev = bdev_get(type);
if (!bdev) {
return NULL;
......@@ -3616,7 +3780,7 @@ static struct bdev * do_bdev_create(const char *dest, const char *type,
* for use. Before completing, the caller will need to call the
* umount operation and bdev_put().
* @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
* @type: the bdevtype (dir, btrfs, zfs, etc)
* @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
* @cname: the container name
* @specs: details about the backing store to create, like fstype
*/
......@@ -3624,7 +3788,7 @@ struct bdev *bdev_create(const char *dest, const char *type,
const char *cname, struct bdev_specs *specs)
{
struct bdev *bdev;
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", NULL};
char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
if (!type)
return do_bdev_create(dest, "dir", cname, specs);
......
......@@ -87,6 +87,7 @@ const char *lxc_global_config_value(const char *option_name)
{ "lxc.bdev.lvm.vg", DEFAULT_VG },
{ "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
{ "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
{ "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
{ "lxc.lxcpath", NULL },
{ "lxc.default_config", NULL },
{ "lxc.cgroup.pattern", NULL },
......
......@@ -42,6 +42,7 @@
#define DEFAULT_VG "lxc"
#define DEFAULT_THIN_POOL "lxc"
#define DEFAULT_ZFSROOT "lxc"
#define DEFAULT_RBDPOOL "lxc"
extern void lxc_setup_fs(void);
extern const char *lxc_global_config_value(const char *option_name);
......
......@@ -81,6 +81,8 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
case '4': args->fssize = get_fssize(arg); break;
case '5': args->zfsroot = arg; break;
case '6': args->dir = arg; break;
case '7': args->rbdname = arg; break;
case '8': args->rbdpool = arg; break;
}
return 0;
}
......@@ -96,6 +98,8 @@ static const struct option my_longopts[] = {
{"fssize", required_argument, 0, '4'},
{"zfsroot", required_argument, 0, '5'},
{"dir", required_argument, 0, '6'},
{"rbdname", required_argument, 0, '7'},
{"rbdpool", required_argument, 0, '8'},
LXC_COMMON_OPTIONS
};
......@@ -142,6 +146,10 @@ Options :\n\
(Default: lxc)\n\
--thinpool=TP Use LVM thin pool called TP\n\
(Default: lxc)\n\
--rbdname=RBDNAME Use Ceph RBD name RBDNAME\n\
(Default: container name)\n\
--rbdpool=POOL Use Ceph RBD pool name POOL\n\
(Default: lxc)\n\
--fstype=TYPE Create fstype TYPE\n\
(Default: ext3)\n\
--fssize=SIZE[U] Create filesystem of size SIZE * unit U (bBkKmMgGtT)\n\
......@@ -159,7 +167,8 @@ static bool validate_bdev_args(struct lxc_arguments *a)
if (strcmp(a->bdevtype, "best") != 0) {
if (a->fstype || a->fssize) {
if (strcmp(a->bdevtype, "lvm") != 0 &&
strcmp(a->bdevtype, "loop") != 0) {
strcmp(a->bdevtype, "loop") != 0 &&
strcmp(a->bdevtype, "rbd") != 0) {
fprintf(stderr, "filesystem type and size are only valid with block devices\n");
return false;
}
......@@ -170,6 +179,12 @@ static bool validate_bdev_args(struct lxc_arguments *a)
return false;
}
}
if (strcmp(a->bdevtype, "rbd") != 0) {
if (a->rbdname || a->rbdpool) {
fprintf(stderr, "--rbdname and --rbdpool are only valid with -B rbd\n");
return false;
}
}
if (strcmp(a->bdevtype, "zfs") != 0) {
if (a->zfsroot) {
fprintf(stderr, "zfsroot is only valid with -B zfs\n");
......@@ -262,6 +277,12 @@ int main(int argc, char *argv[])
if (my_args.thinpool)
spec.lvm.thinpool = my_args.thinpool;
}
if (strcmp(my_args.bdevtype, "rbd") == 0 || strcmp(my_args.bdevtype, "best") == 0) {
if (my_args.rbdname)
spec.rbd.rbdname = my_args.rbdname;
if (my_args.rbdpool)
spec.rbd.rbdpool = my_args.rbdpool;
}
if (my_args.dir) {
spec.dir = my_args.dir;
}
......
......@@ -858,6 +858,10 @@ struct bdev_specs {
char *thinpool; /*!< LVM thin pool to use, if any */
} lvm;
char *dir; /*!< Directory path */
struct {
char *rbdname; /*!< RBD image name */
char *rbdpool; /*!< Ceph pool name */
} rbd;
};
/*!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment