Bug #7804
closedfdisk_read_master_part_table() causes 'format' to crash
100%
Description
We have a few NVMe SSDs formatted to 4K block size,
# prtvtoc /dev/rdsk/c1t1d0p0 * /dev/rdsk/c1t1d0p0 partition map * * Dimensions: * 4096 bytes/sector * 56 sectors/track * 224 tracks/cylinder * 12544 sectors/cylinder * 38933 cylinders * 38933 accessible cylinders * * Flags: * 1: unmountable * 10: read-only * * First Sector Last * Partition Tag Flags Sector Count Sector Mount Directory 0 0 00 0 488378646 488378645 2 5 01 0 488378646 488378645 8 1 01 0 12544 12543
Selecting the drive in format causes it to crash:
# format Searching for disks...done AVAILABLE DISK SELECTIONS: 0. c1t1d0 <drive type unknown> /pci@0,0/pci8086,6f04@2/pci8086,3703@0/blkdev@1,0 1. c2t1d0 <drive type unknown> /pci@0,0/pci8086,6f05@2,1/pci8086,3703@0/blkdev@1,0 2. c3t1d0 <drive type unknown> /pci@0,0/pci8086,6f08@3/pci8086,3703@0/blkdev@1,0 3. c4t1d0 <drive type unknown> /pci@0,0/pci8086,6f09@3,1/pci8086,3703@0/blkdev@1,0 4. c6t4d0 <ATA-SuperMicro SSD-SOB20S-118.00GB> /pci@0,0/pci15d9,844@1f,2/disk@4,0 Specify disk (enter its number): 0 Segmentation Fault (core dumped)
with the fix in usr/src/lib/libfdisk/common/libfdisk.c:
diff --git a/usr/src/lib/libfdisk/common/libfdisk.c b/usr/src/lib/libfdisk/common/libfdisk.c index 37581f5..602cbba 100644 --- a/usr/src/lib/libfdisk/common/libfdisk.c +++ b/usr/src/lib/libfdisk/common/libfdisk.c @@ -768,7 +768,8 @@ fdisk_alloc_part_table() static int fdisk_read_master_part_table(ext_part_t *epp) { - uchar_t buf[512]; + struct dk_minfo_ext dkmp_ext; + uchar_t *buf; int sectsize = 512; int size = sizeof (struct ipart); int cpcnt = FD_NUMPART * size; @@ -776,17 +777,27 @@ fdisk_read_master_part_table(ext_part_t *epp) if (lseek(epp->dev_fd, 0, SEEK_SET) < 0) { return (EIO); } + if (ioctl(epp->dev_fd, DKIOCGMEDIAINFOEXT, &dkmp_ext) < 0) { + return (EIO); + } + buf = calloc(dkmp_ext.dki_lbsize, sizeof(uchar_t)); + if (buf == NULL) { + return ENOMEM; + } if (read(epp->dev_fd, buf, sectsize) < sectsize) { + free(buf); return (EIO); } /*LINTED*/ if (LE_16((*(uint16_t *)&buf[510])) != MBB_MAGIC) { bzero(epp->mtable, cpcnt); + free(buf); return (FDISK_EBADMAGIC); } bcopy(&buf[FDISK_PART_TABLE_START], epp->mtable, cpcnt); + free(buf); return (FDISK_SUCCESS); }
it works:
[root@batfs9995 ~]# format Searching for disks...done AVAILABLE DISK SELECTIONS: 0. c1t1d0 <drive type unknown> /pci@0,0/pci8086,6f04@2/pci8086,3703@0/blkdev@1,0 1. c2t1d0 <drive type unknown> /pci@0,0/pci8086,6f05@2,1/pci8086,3703@0/blkdev@1,0 2. c3t1d0 <drive type unknown> /pci@0,0/pci8086,6f08@3/pci8086,3703@0/blkdev@1,0 3. c4t1d0 <drive type unknown> /pci@0,0/pci8086,6f09@3,1/pci8086,3703@0/blkdev@1,0 4. c6t4d0 <ATA-SuperMicro SSD-SOB20S-118.00GB> /pci@0,0/pci15d9,844@1f,2/disk@4,0 Specify disk (enter its number): 0 AVAILABLE DRIVE TYPES: 0. other Specify disk type (enter its number):
The idea of using ioctl to get block size was suggested by Toomas Soome.
Related issues
Updated by Youzhong Yang over 6 years ago
Updated by Youzhong Yang over 6 years ago
The following program can be used to reproduce the issue:
# cat rdisk.c #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> int main(int argc, char **argv) { char *disk = NULL; int fd; uchar_t *buf; int requested_size = 512; int bytes; if (argc < 2) { printf("Usage: %s /dev/rdsk/... [bytes to read]\n"); exit(1); } disk = argv[1]; if (argc > 2) requested_size = atoi(argv[2]); buf = calloc(requested_size, 1); if (buf == NULL) { printf("No memory\n"); exit(1); } fd = open(disk, O_RDONLY); if (fd == -1) { perror("open"); free(buf); exit(1); } if (lseek(fd, 0, SEEK_SET) < 0) { perror("lseek"); close(fd); free(buf); exit(1); } bytes = read(fd, buf, requested_size); if (bytes == -1) perror("read"); printf("asked %d, got %d, buf ptr = %p, pid %d\n", requested_size, bytes, buf, getpid()); close(fd); free(buf); return(0); }
Build it: gcc -o rdisk rdisk.c
In one terminal window, run
# dtrace -n 'nvme_create_nvm_cmd:entry /arg1 == 2 && args[2]->x_blkno == 0 && args[2]->x_nblks == 1/ {stack();printf("blkno %d nblks %d ndmac %d dmac_size %d dmac_laddress %p pid %d",args[2]->x_blkno, args[2]->x_nblks, args[2]->x_ndmac, args[2]->x_dmac.dmac_size, args[2]->x_dmac._dmu._dmac_ll, pid);}'
In another window, run the program, it crashes
# LD_PRELOAD=libumem.so UMEM_DEBUG=audit /var/tmp/rdisk /dev/rdsk/c1t1d0p0 512 asked 512, got 512, buf ptr = 4ffbd0, pid 4124 Abort (core dumped)
and in dtrace window, you will get something like the following:
CPU ID FUNCTION:NAME 1 66976 nvme_create_nvm_cmd:entry nvme`nvme_bd_cmd+0x45 nvme`nvme_bd_read+0x1a blkdev`bd_sched+0x96 blkdev`bd_submit+0x57 blkdev`bd_tg_rdwr+0xee cmlb`cmlb_use_efi+0xc0 cmlb`cmlb_validate_geometry+0xcb cmlb`cmlb_validate+0x83 blkdev`bd_open+0xd2 genunix`dev_open+0x30 specfs`spec_open+0x206 genunix`fop_open+0x89 genunix`vn_openat+0x234 genunix`copen+0x1fc genunix`openat+0x2a genunix`open+0x25 unix`sys_syscall+0x1a2 blkno 0 nblks 1 ndmac 1 dmac_size 4096 dmac_laddress 5f7653f000 pid 4124 1 66976 nvme_create_nvm_cmd:entry nvme`nvme_bd_cmd+0x45 nvme`nvme_bd_read+0x1a blkdev`bd_sched+0x96 blkdev`bd_submit+0x57 blkdev`bd_tg_rdwr+0xee cmlb`cmlb_read_fdisk+0x7d cmlb`cmlb_validate_geometry+0x1e6 cmlb`cmlb_validate+0x83 blkdev`bd_open+0xd2 genunix`dev_open+0x30 specfs`spec_open+0x206 genunix`fop_open+0x89 genunix`vn_openat+0x234 genunix`copen+0x1fc genunix`openat+0x2a genunix`open+0x25 unix`sys_syscall+0x1a2 blkno 0 nblks 1 ndmac 1 dmac_size 4096 dmac_laddress 5f6e1c8000 pid 4124 1 66976 nvme_create_nvm_cmd:entry nvme`nvme_bd_cmd+0x45 nvme`nvme_bd_read+0x1a blkdev`bd_sched+0x96 blkdev`bd_submit+0x57 blkdev`bd_strategy+0x13c genunix`default_physio+0x2db genunix`physio+0x25 blkdev`bd_read+0x34 genunix`cdev_read+0x2d specfs`spec_read+0x2b9 genunix`fop_read+0xf3 genunix`read+0x2a7 unix`sys_syscall+0x1a2 blkno 0 nblks 1 ndmac 1 dmac_size 512 dmac_laddress 5e46c16bd0 pid 4124
mdb the core indicates there was memory corruption:
# mdb /zones/global/cores/core.rdisk.4124 Loading modules: [ libumem.so.1 libc.so.1 ld.so.1 ] > $C ffffbf7fffdff890 libc.so.1`_lwp_kill+0xa() ffffbf7fffdff8c0 libc.so.1`raise+0x20(6) ffffbf7fffdff8d0 libumem.so.1`umem_do_abort+0x44() ffffbf7fffdff9d0 0xffffbf7ffec28025() ffffbf7fffdffa70 libumem.so.1`umem_error+0x1aa(1, 48e028, 4ffbc0) ffffbf7fffdffa90 libumem.so.1`umem_free+0xa8(4ffbc0, 210) ffffbf7fffdffae0 libumem.so.1`process_free+0x145(4ffbd0, 1, 0) ffffbf7fffdffb00 libumem.so.1`umem_malloc_free+0x1d(4ffbd0) ffffbf7fffdffb40 main+0x17d() ffffbf7fffdffb50 _start+0x6c()
Why it crashes? The user space 512 bytes buffer is DMA mapped by bd_strategy() -> bd_xfer_alloc(), and that DMA memory address will be used by NVMe driver to store the 1st block of the device. However, since the drive has been formatted to be 4k block size, so the minimum size transferred by NVMe device is 4K. So even though the application only requests 512 bytes, but NVMe driver still reads 4K data into the buffer.
Updated by Electric Monk over 6 years ago
- Status changed from New to Closed
- % Done changed from 0 to 100
git commit 8533946bd264dca901fdf56bf3da1d81e728b423
commit 8533946bd264dca901fdf56bf3da1d81e728b423 Author: Youzhong Yang <yyang@mathworks.com> Date: 2017-02-27T13:17:39.000Z 7804 fdisk_read_master_part_table() causes 'format' to crash Reviewed by: Jason King <jason.brian.king@gmail.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Dan McDonald <danmcd@omniti.com>
Updated by Nikola M. over 6 years ago
- Related to Bug #7999: 'format' command says "Failed to initialise libfdisk" added