Merge commit 'refs/top-bases/linux-zfs-branch' into linux-zfs-branch

This commit is contained in:
Brian Behlendorf 2009-10-14 16:32:17 -07:00
commit 274717b31e
5 changed files with 344 additions and 61 deletions

View File

@ -406,17 +406,23 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
if (arg[0] == '/') {
/*
* Complete device or file path. Exact type is determined by
* examining the file descriptor afterwards.
* examining the file descriptor afterwards. Symbolic links
* are resolved to their real paths for the is_whole_disk()
* and S_ISBLK/S_ISREG type checks.
*/
wholedisk = is_whole_disk(arg);
if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
if (realpath(arg, path) == NULL) {
(void) fprintf(stderr,
gettext("cannot open '%s': %s\n"),
arg, strerror(errno));
gettext("cannot resolve path '%s'\n"), arg);
return (NULL);
}
(void) strlcpy(path, arg, sizeof (path));
wholedisk = is_whole_disk(path);
if (!wholedisk && (stat64(path, &statbuf) != 0)) {
(void) fprintf(stderr,
gettext("cannot open '%s': %s\n"),
path, strerror(errno));
return (NULL);
}
} else {
/*
* This may be a short path for a device, or it could be total
@ -894,8 +900,10 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
uint64_t wholedisk;
int fd;
int ret;
#if defined(__sun__) || defined(__sun)
ddi_devid_t devid;
char *minor = NULL, *devid_str = NULL;
#endif
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
@ -924,7 +932,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
/*
* Fill in the devid, now that we've labeled the disk.
*/
(void) snprintf(buf, sizeof (buf), "%ss0", path);
(void) snprintf(buf, sizeof (buf), "%s%s", path, FIRST_SLICE);
if ((fd = open(buf, O_RDONLY)) < 0) {
(void) fprintf(stderr,
gettext("cannot open '%s': %s\n"),
@ -932,6 +940,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
return (-1);
}
#if defined(__sun__) || defined(__sun)
if (devid_get(fd, &devid) == 0) {
if (devid_get_minor_name(fd, &minor) == 0 &&
(devid_str = devid_str_encode(devid, minor)) !=
@ -945,6 +954,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
devid_str_free(minor);
devid_free(devid);
}
#endif
/*
* Update the path to refer to the 's0' slice. The presence of

View File

@ -30,6 +30,7 @@
#include <strings.h>
#include <unistd.h>
#include <uuid/uuid.h>
#include <zlib.h>
#include <libintl.h>
#include <sys/types.h>
#include <sys/dkio.h>
@ -39,7 +40,9 @@
#include <sys/dktp/fdisk.h>
#include <sys/efi_partition.h>
#include <sys/byteorder.h>
#include <sys/ddi.h>
#if defined(__linux__)
#include <linux/fs.h>
#endif
static struct uuid_to_ptag {
struct uuid uuid;
@ -50,11 +53,11 @@ static struct uuid_to_ptag {
{ EFI_SWAP },
{ EFI_USR },
{ EFI_BACKUP },
{ 0 }, /* STAND is never used */
{ EFI_UNUSED }, /* STAND is never used */
{ EFI_VAR },
{ EFI_HOME },
{ EFI_ALTSCTR },
{ 0 }, /* CACHE (cachefs) is never used */
{ EFI_UNUSED }, /* CACHE (cachefs) is never used */
{ EFI_RESERVED },
{ EFI_SYSTEM },
{ EFI_LEGACY_MBR },
@ -108,21 +111,115 @@ int efi_debug = 1;
int efi_debug = 0;
#endif
extern unsigned int efi_crc32(const unsigned char *, unsigned int);
static int efi_read(int, struct dk_gpt *);
/*
* Return a 32-bit CRC of the contents of the buffer. Pre-and-post
* one's conditioning will be handled by crc32() internally.
*/
static uint32_t
efi_crc32(const unsigned char *buf, unsigned int size)
{
uint32_t crc = crc32(0, Z_NULL, 0);
crc = crc32(crc, buf, size);
return (crc);
}
static int
read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
{
struct dk_minfo disk_info;
int sector_size;
unsigned long long capacity_size;
if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
return (-1);
if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
return (-1);
*lbsize = (uint_t)sector_size;
*capacity = (diskaddr_t)(capacity_size / sector_size);
if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
return (errno);
*capacity = disk_info.dki_capacity;
*lbsize = disk_info.dki_lbsize;
return (0);
}
static int
efi_get_info(int fd, struct dk_cinfo *dki_info)
{
#if defined(__linux__)
char path[PATH_MAX];
char *dev_path;
int rval;
/*
* The simplest way to get the partition number under linux is
* to parse it out of the /dev/<disk><parition> block device name.
* The kernel creates this using the partition number when it
* populates /dev/ so it may be trusted. Another issue is that
* that the libefi API only provides the open fd and not the
* file path. To handle this realpath(3) is used to resolve
* the block device name from /proc/self/fd/<fd>. Aside from
* the partition number we collect some additional device info.
*/
memset(dki_info, 0, sizeof(*dki_info));
(void) sprintf(path, "/proc/self/fd/%d", fd);
if ((dev_path = realpath(path, NULL)) == NULL)
goto error;
rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
dki_info->dki_dname,
&dki_info->dki_partition);
switch (rval) {
case 0:
errno = EINVAL;
goto error;
case 1:
dki_info->dki_partition = 0;
}
if ((strncmp(dki_info->dki_dname, "sd", 2) == 0)) {
strcpy(dki_info->dki_cname, "sd");
dki_info->dki_ctype = DKC_SCSI_CCS;
} else if ((strncmp(dki_info->dki_dname, "hd", 2) == 0)) {
strcpy(dki_info->dki_cname, "hd");
dki_info->dki_ctype = DKC_DIRECT;
} else if ((strncmp(dki_info->dki_dname, "md", 2) == 0)) {
strcpy(dki_info->dki_cname, "pseudo");
dki_info->dki_ctype = DKC_MD;
} else if ((strncmp(dki_info->dki_dname, "ram", 3) == 0)) {
strcpy(dki_info->dki_cname, "pseudo");
dki_info->dki_ctype = DKC_PCMCIA_MEM;
} else if ((strncmp(dki_info->dki_dname, "loop", 4) == 0)) {
strcpy(dki_info->dki_cname, "pseudo");
dki_info->dki_ctype = DKC_VBD;
} else {
strcpy(dki_info->dki_cname, "unknown");
dki_info->dki_ctype = DKC_UNKNOWN;
}
free(dev_path);
#else
if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1)
goto error;
#endif
return (0);
error:
if (efi_debug)
(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
switch (errno) {
case EIO:
return (VT_EIO);
case EINVAL:
return (VT_EINVAL);
default:
return (VT_ERROR);
}
}
/*
* the number of blocks the EFI label takes up (round up to nearest
* block)
@ -136,12 +233,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
int
efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
{
diskaddr_t capacity;
uint_t lbsize;
diskaddr_t capacity = 0;
uint_t lbsize = 0;
uint_t nblocks;
size_t length;
struct dk_gpt *vptr;
struct uuid uuid;
struct dk_cinfo dki_info;
if (read_disk_info(fd, &capacity, &lbsize) != 0) {
if (efi_debug)
@ -149,6 +247,31 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
"couldn't read disk information\n");
return (-1);
}
#if defined(__linux__)
if (efi_get_info(fd, &dki_info) != 0) {
if (efi_debug)
(void) fprintf(stderr,
"couldn't read disk information\n");
return (-1);
}
if (dki_info.dki_partition != 0) {
if (efi_debug)
(void) fprintf(stderr,
"can only partition whole devices\n");
return (-1);
}
if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
(dki_info.dki_ctype == DKC_VBD) ||
(dki_info.dki_ctype == DKC_UNKNOWN)) {
if (efi_debug)
(void) fprintf(stderr,
"unpartitionable device type %d\n",
dki_info.dki_ctype);
return (-1);
}
#endif
nblocks = NBLOCKS(nparts, lbsize);
if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
@ -244,14 +367,117 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
{
void *data = dk_ioc->dki_data;
int error;
#if defined(__linux__)
diskaddr_t capacity;
uint_t lbsize;
/*
* When the IO is not being performed in kernel as an ioctl we need
* to know the sector size so we can seek to the proper byte offset.
*/
if (read_disk_info(fd, &capacity, &lbsize) == -1) {
if (efi_debug)
fprintf(stderr,"unable to read disk info: %d",errno);
errno = EIO;
return -1;
}
switch (cmd) {
case DKIOCGETEFI:
if (lbsize == 0) {
if (efi_debug)
(void) fprintf(stderr, "DKIOCGETEFI assuming "
"LBA %d bytes\n", DEV_BSIZE);
lbsize = DEV_BSIZE;
}
error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
if (error == -1)
return error;
error = read(fd, data, dk_ioc->dki_length);
if (error == -1)
return error;
if (error != dk_ioc->dki_length) {
if (efi_debug)
(void) fprintf(stderr, "DKIOCGETEFI short "
"read of %d bytes\n", error);
errno = EIO;
return -1;
}
error = 0;
break;
case DKIOCSETEFI:
if (lbsize == 0) {
if (efi_debug)
(void) fprintf(stderr, "DKIOCSETEFI unknown "
"LBA size\n");
errno = EIO;
return -1;
}
error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
if (error == -1)
return error;
error = write(fd, data, dk_ioc->dki_length);
if (error == -1)
return error;
if (error != dk_ioc->dki_length) {
if (efi_debug)
(void) fprintf(stderr, "DKIOCSETEFI short "
"write of %d bytes\n", error);
errno = EIO;
return -1;
}
error = fdatasync(fd);
if (error == -1)
return error;
error = 0;
break;
default:
if (efi_debug)
(void) fprintf(stderr, "unsupported ioctl()\n");
errno = EIO;
return -1;
}
#else
dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
error = ioctl(fd, cmd, (void *)dk_ioc);
dk_ioc->dki_data = data;
#endif
return (error);
}
#if defined(__linux__)
static int
efi_rescan(int fd)
{
int retry = 5;
int error;
/* Notify the kernel a devices partition table has been updated */
while ((error = ioctl(fd, BLKRRPART)) != 0) {
if (--retry == 0) {
(void) fprintf(stderr, "the kernel failed to rescan "
"the partition table: %d\n", errno);
return (-1);
}
}
return (0);
}
#endif
static int
check_label(int fd, dk_efi_t *dk_ioc)
{
@ -306,6 +532,8 @@ efi_read(int fd, struct dk_gpt *vtoc)
int rval = 0;
int md_flag = 0;
int vdc_flag = 0;
diskaddr_t capacity = 0;
uint_t lbsize = 0;
struct dk_minfo disk_info;
dk_efi_t dk_ioc;
efi_gpt_t *efi;
@ -317,19 +545,9 @@ efi_read(int fd, struct dk_gpt *vtoc)
/*
* get the partition number for this file descriptor.
*/
if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
if (efi_debug) {
(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
}
switch (errno) {
case EIO:
return (VT_EIO);
case EINVAL:
return (VT_EINVAL);
default:
return (VT_ERROR);
}
}
if ((rval = efi_get_info(fd, &dki_info)) != 0)
return rval;
if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
(strncmp(dki_info.dki_dname, "md", 3) == 0)) {
md_flag++;
@ -343,14 +561,18 @@ efi_read(int fd, struct dk_gpt *vtoc)
}
/* get the LBA size */
if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
if (read_disk_info(fd, &capacity, &lbsize) == -1) {
if (efi_debug) {
(void) fprintf(stderr,
"assuming LBA 512 bytes %d\n",
"unable to read disk info: %d",
errno);
}
disk_info.dki_lbsize = DEV_BSIZE;
return (VT_EINVAL);
}
disk_info.dki_lbsize = lbsize;
disk_info.dki_capacity = capacity;
if (disk_info.dki_lbsize == 0) {
if (efi_debug) {
(void) fprintf(stderr,
@ -840,22 +1062,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
efi_gpe_t *efi_parts;
int i, j;
struct dk_cinfo dki_info;
int rval;
int md_flag = 0;
int nblocks;
diskaddr_t lba_backup_gpt_hdr;
if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
if (efi_debug)
(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
switch (errno) {
case EIO:
return (VT_EIO);
case EINVAL:
return (VT_EINVAL);
default:
return (VT_ERROR);
}
}
if ((rval = efi_get_info(fd, &dki_info)) != 0)
return rval;
/* check if we are dealing wih a metadevice */
if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
@ -942,6 +1155,10 @@ efi_write(int fd, struct dk_gpt *vtoc)
return (VT_EINVAL);
}
/* Zero's should be written for empty partitions */
if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
continue;
efi_parts[i].efi_gpe_StartingLBA =
LE_64(vtoc->efi_parts[i].p_start);
efi_parts[i].efi_gpe_EndingLBA =
@ -1033,6 +1250,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
/* write the PMBR */
(void) write_pmbr(fd, vtoc);
free(dk_ioc.dki_data);
#if defined(__linux__)
rval = efi_rescan(fd);
if (rval)
return (VT_ERROR);
#endif
return (0);
}
@ -1050,6 +1274,7 @@ efi_free(struct dk_gpt *ptr)
int
efi_type(int fd)
{
#if 0
struct vtoc vtoc;
struct extvtoc extvtoc;
@ -1063,6 +1288,9 @@ efi_type(int fd)
}
}
return (0);
#else
return (ENOSYS);
#endif
}
void
@ -1176,7 +1404,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc)
return (-1);
}
for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) {
(*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
(*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
(*vtoc)->efi_parts[i].p_start = 0;

View File

@ -56,11 +56,15 @@ extern "C" {
#if defined(__sun__) || defined(__sun)
#define DISK_ROOT "/dev/dsk"
#define RDISK_ROOT "/dev/rdsk"
#define FIRST_SLICE "s0"
#define BACKUP_SLICE "s2"
#endif
#ifdef __linux__
#define DISK_ROOT "/dev"
#define RDISK_ROOT DISK_ROOT
#define FIRST_SLICE "1"
#define BACKUP_SLICE ""
#endif
/*

View File

@ -634,9 +634,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
/*
* Don't start the slice at the default block of 34; many storage
* devices will use a stripe width of 128k, so start there instead.
* devices will use a stripe width of 128k, other vendors prefer a 1m
* alignment. It is best to play it safe and ensure a 1m alignment
* give 512b blocks. When the block size is larger by a power of 2
* we will still be 1m aligned.
*/
#define NEW_START_BLOCK 256
#define NEW_START_BLOCK 2048
/*
* Validate the given pool name, optionally putting an extended error message in
@ -1758,6 +1761,7 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
static int
zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
{
#if 0
char path[MAXPATHLEN];
char errbuf[1024];
int fd, error;
@ -1788,6 +1792,12 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
return (zfs_error(hdl, EZFS_NOCAP, errbuf));
}
return (0);
#else
char errbuf[1024];
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
"relabel '%s/%s': libefi is unsupported"), DISK_ROOT, name);
return (zfs_error(hdl, EZFS_NOTSUP, errbuf));
#endif
}
/*
@ -3119,7 +3129,10 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
uint64_t slice_size;
diskaddr_t start_block;
char errbuf[1024];
#if defined(__linux__)
struct stat64 statbuf;
int i;
#endif
/* prepare an error message just in case */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
@ -3155,6 +3168,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
* This shouldn't happen. We've long since verified that this
* is a valid device.
*/
printf("errno =%d\n", errno);
zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "unable to open device"));
return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
@ -3216,6 +3230,24 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
(void) close(fd);
efi_free(vtoc);
#if defined(__linux__)
/*
* The efi partition table has been successfully written and the
* kernel notified. However, it still may take a moment for udev
* to notice the devfs update and properly populate /dev/. We will
* wait up to 3 seconds which is far far far longer than needed.
*/
(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
FIRST_SLICE);
for (i = 0; i < 3000; i++) {
if (stat64(path, &statbuf) == 0 || errno != ENOENT)
break;
usleep(1000);
}
#endif
return (0);
}

View File

@ -76,6 +76,19 @@ vdev_bdev_mode(int smode)
}
#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
static uint64_t
bdev_capacity(struct block_device *bdev)
{
struct hd_struct *part = bdev->bd_part;
/* The partition capacity referenced by the block device */
if (part)
return part->nr_sects;
/* Otherwise assume the full device capacity */
return get_capacity(bdev->bd_disk);
}
static int
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
{
@ -122,15 +135,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
v->vdev_nowritecache = B_FALSE;
/* Determine the actual size of the device (in bytes)
*
* XXX: SECTOR_SIZE is defined to 512b which may not be true for
* your device, we must use the actual hardware sector size.
*/
*psize = get_capacity(bdev->bd_disk) * SECTOR_SIZE;
/* Physical volume size in bytes */
*psize = bdev_capacity(bdev) * bdev_hardsect_size(bdev);
/* Based on the minimum sector size set the block size */
*ashift = highbit(MAX(SECTOR_SIZE, SPA_MINBLOCKSIZE)) - 1;
*ashift = highbit(MAX(bdev_hardsect_size(bdev), SPA_MINBLOCKSIZE)) - 1;
return 0;
}
@ -314,7 +323,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
uint64_t bio_offset;
int i, error = 0, bio_count, bio_size;
ASSERT3S(kbuf_offset % SECTOR_SIZE, ==, 0);
ASSERT3S(kbuf_offset % bdev_hardsect_size(bdev), ==, 0);
q = bdev_get_queue(bdev);
if (!q)
return ENXIO;
@ -558,7 +567,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
if (IS_ERR(bdev))
return -PTR_ERR(bdev);
s = get_capacity(bdev->bd_disk) * SECTOR_SIZE;
s = bdev_capacity(bdev) * bdev_hardsect_size(bdev);
if (s == 0) {
vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
return EIO;