Project

General

Profile

Actions

Bug #7804

closed

fdisk_read_master_part_table() causes 'format' to crash

Added by Youzhong Yang over 6 years ago. Updated over 6 years ago.

Status:
Closed
Priority:
Normal
Assignee:
-
Category:
cmd - userland programs
Start date:
2017-01-25
Due date:
% Done:

100%

Estimated time:
Difficulty:
Medium
Tags:
needs-triage
Gerrit CR:
External Bug:

Description

We have a few NVMe SSDs formatted to 4K block size,

# prtvtoc /dev/rdsk/c1t1d0p0
* /dev/rdsk/c1t1d0p0 partition map
*
* Dimensions:
*    4096 bytes/sector
*      56 sectors/track
*     224 tracks/cylinder
*   12544 sectors/cylinder
*   38933 cylinders
*   38933 accessible cylinders
*
* Flags:
*   1: unmountable
*  10: read-only
*
*                          First     Sector    Last
* Partition  Tag  Flags    Sector     Count    Sector  Mount Directory
       0      0    00          0 488378646 488378645
       2      5    01          0 488378646 488378645
       8      1    01          0     12544     12543

Selecting the drive in format causes it to crash:

# format
Searching for disks...done

AVAILABLE DISK SELECTIONS:
       0. c1t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f04@2/pci8086,3703@0/blkdev@1,0
       1. c2t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f05@2,1/pci8086,3703@0/blkdev@1,0
       2. c3t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f08@3/pci8086,3703@0/blkdev@1,0
       3. c4t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f09@3,1/pci8086,3703@0/blkdev@1,0
       4. c6t4d0 <ATA-SuperMicro SSD-SOB20S-118.00GB>
          /pci@0,0/pci15d9,844@1f,2/disk@4,0
Specify disk (enter its number): 0

Segmentation Fault (core dumped)

with the fix in usr/src/lib/libfdisk/common/libfdisk.c:

diff --git a/usr/src/lib/libfdisk/common/libfdisk.c b/usr/src/lib/libfdisk/common/libfdisk.c
index 37581f5..602cbba 100644
--- a/usr/src/lib/libfdisk/common/libfdisk.c
+++ b/usr/src/lib/libfdisk/common/libfdisk.c
@@ -768,7 +768,8 @@ fdisk_alloc_part_table()
 static int
 fdisk_read_master_part_table(ext_part_t *epp)
 {
-       uchar_t buf[512];
+       struct dk_minfo_ext dkmp_ext;
+       uchar_t *buf;
        int sectsize = 512;
        int size = sizeof (struct ipart);
        int cpcnt = FD_NUMPART * size;
@@ -776,17 +777,27 @@ fdisk_read_master_part_table(ext_part_t *epp)
        if (lseek(epp->dev_fd, 0, SEEK_SET) < 0) {
                return (EIO);
        }
+       if (ioctl(epp->dev_fd, DKIOCGMEDIAINFOEXT, &dkmp_ext) < 0) {
+               return (EIO);
+       }
+       buf = calloc(dkmp_ext.dki_lbsize, sizeof(uchar_t));
+       if (buf == NULL) {
+               return ENOMEM;
+       }
        if (read(epp->dev_fd, buf, sectsize) < sectsize) {
+               free(buf);
                return (EIO);
        }

        /*LINTED*/
        if (LE_16((*(uint16_t *)&buf[510])) != MBB_MAGIC) {
                bzero(epp->mtable, cpcnt);
+               free(buf);
                return (FDISK_EBADMAGIC);
        }

        bcopy(&buf[FDISK_PART_TABLE_START], epp->mtable, cpcnt);
+       free(buf);

        return (FDISK_SUCCESS);
 }

it works:

[root@batfs9995 ~]# format
Searching for disks...done

AVAILABLE DISK SELECTIONS:
       0. c1t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f04@2/pci8086,3703@0/blkdev@1,0
       1. c2t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f05@2,1/pci8086,3703@0/blkdev@1,0
       2. c3t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f08@3/pci8086,3703@0/blkdev@1,0
       3. c4t1d0 <drive type unknown>
          /pci@0,0/pci8086,6f09@3,1/pci8086,3703@0/blkdev@1,0
       4. c6t4d0 <ATA-SuperMicro SSD-SOB20S-118.00GB>
          /pci@0,0/pci15d9,844@1f,2/disk@4,0
Specify disk (enter its number): 0

AVAILABLE DRIVE TYPES:
        0. other
Specify disk type (enter its number):

The idea of using ioctl to get block size was suggested by Toomas Soome.


Related issues

Related to illumos gate - Bug #7999: 'format' command says "Failed to initialise libfdisk"Closed2017-03-23

Actions
Actions #2

Updated by Youzhong Yang over 6 years ago

The following program can be used to reproduce the issue:

# cat rdisk.c
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char **argv)
{
        char *disk = NULL;
        int fd;
        uchar_t *buf;
        int requested_size = 512;
        int bytes;

        if (argc < 2) {
                printf("Usage: %s /dev/rdsk/... [bytes to read]\n");
                exit(1);
        }

        disk = argv[1];
        if (argc > 2) requested_size = atoi(argv[2]);
        buf = calloc(requested_size, 1);
        if (buf == NULL) {
                printf("No memory\n");
                exit(1);
        }

        fd = open(disk, O_RDONLY);
        if (fd == -1) {
                perror("open");
                free(buf);
                exit(1);
        }
        if (lseek(fd, 0, SEEK_SET) < 0) {
                perror("lseek");
                close(fd);
                free(buf);
                exit(1);
        }
        bytes = read(fd, buf, requested_size);
        if (bytes == -1) perror("read");
        printf("asked %d, got %d, buf ptr = %p, pid %d\n", requested_size, bytes, buf, getpid());
        close(fd);
        free(buf);
        return(0);
}

Build it: gcc -o rdisk rdisk.c

In one terminal window, run

# dtrace -n 'nvme_create_nvm_cmd:entry /arg1 == 2 && args[2]->x_blkno == 0 && args[2]->x_nblks == 1/ {stack();printf("blkno %d nblks %d ndmac %d dmac_size %d dmac_laddress %p pid %d",args[2]->x_blkno, args[2]->x_nblks, args[2]->x_ndmac, args[2]->x_dmac.dmac_size, args[2]->x_dmac._dmu._dmac_ll, pid);}'

In another window, run the program, it crashes

# LD_PRELOAD=libumem.so UMEM_DEBUG=audit /var/tmp/rdisk /dev/rdsk/c1t1d0p0 512
asked 512, got 512, buf ptr = 4ffbd0, pid 4124
Abort (core dumped)

and in dtrace window, you will get something like the following:

CPU     ID                    FUNCTION:NAME
  1  66976        nvme_create_nvm_cmd:entry
              nvme`nvme_bd_cmd+0x45
              nvme`nvme_bd_read+0x1a
              blkdev`bd_sched+0x96
              blkdev`bd_submit+0x57
              blkdev`bd_tg_rdwr+0xee
              cmlb`cmlb_use_efi+0xc0
              cmlb`cmlb_validate_geometry+0xcb
              cmlb`cmlb_validate+0x83
              blkdev`bd_open+0xd2
              genunix`dev_open+0x30
              specfs`spec_open+0x206
              genunix`fop_open+0x89
              genunix`vn_openat+0x234
              genunix`copen+0x1fc
              genunix`openat+0x2a
              genunix`open+0x25
              unix`sys_syscall+0x1a2
blkno 0 nblks 1 ndmac 1 dmac_size 4096 dmac_laddress 5f7653f000 pid 4124
  1  66976        nvme_create_nvm_cmd:entry
              nvme`nvme_bd_cmd+0x45
              nvme`nvme_bd_read+0x1a
              blkdev`bd_sched+0x96
              blkdev`bd_submit+0x57
              blkdev`bd_tg_rdwr+0xee
              cmlb`cmlb_read_fdisk+0x7d
              cmlb`cmlb_validate_geometry+0x1e6
              cmlb`cmlb_validate+0x83
              blkdev`bd_open+0xd2
              genunix`dev_open+0x30
              specfs`spec_open+0x206
              genunix`fop_open+0x89
              genunix`vn_openat+0x234
              genunix`copen+0x1fc
              genunix`openat+0x2a
              genunix`open+0x25
              unix`sys_syscall+0x1a2
blkno 0 nblks 1 ndmac 1 dmac_size 4096 dmac_laddress 5f6e1c8000 pid 4124
  1  66976        nvme_create_nvm_cmd:entry
              nvme`nvme_bd_cmd+0x45
              nvme`nvme_bd_read+0x1a
              blkdev`bd_sched+0x96
              blkdev`bd_submit+0x57
              blkdev`bd_strategy+0x13c
              genunix`default_physio+0x2db
              genunix`physio+0x25
              blkdev`bd_read+0x34
              genunix`cdev_read+0x2d
              specfs`spec_read+0x2b9
              genunix`fop_read+0xf3
              genunix`read+0x2a7
              unix`sys_syscall+0x1a2
blkno 0 nblks 1 ndmac 1 dmac_size 512 dmac_laddress 5e46c16bd0 pid 4124

mdb the core indicates there was memory corruption:

# mdb /zones/global/cores/core.rdisk.4124
Loading modules: [ libumem.so.1 libc.so.1 ld.so.1 ]
> $C
ffffbf7fffdff890 libc.so.1`_lwp_kill+0xa()
ffffbf7fffdff8c0 libc.so.1`raise+0x20(6)
ffffbf7fffdff8d0 libumem.so.1`umem_do_abort+0x44()
ffffbf7fffdff9d0 0xffffbf7ffec28025()
ffffbf7fffdffa70 libumem.so.1`umem_error+0x1aa(1, 48e028, 4ffbc0)
ffffbf7fffdffa90 libumem.so.1`umem_free+0xa8(4ffbc0, 210)
ffffbf7fffdffae0 libumem.so.1`process_free+0x145(4ffbd0, 1, 0)
ffffbf7fffdffb00 libumem.so.1`umem_malloc_free+0x1d(4ffbd0)
ffffbf7fffdffb40 main+0x17d()
ffffbf7fffdffb50 _start+0x6c()

Why it crashes? The user space 512 bytes buffer is DMA mapped by bd_strategy() -> bd_xfer_alloc(), and that DMA memory address will be used by NVMe driver to store the 1st block of the device. However, since the drive has been formatted to be 4k block size, so the minimum size transferred by NVMe device is 4K. So even though the application only requests 512 bytes, but NVMe driver still reads 4K data into the buffer.

Actions #3

Updated by Electric Monk over 6 years ago

  • Status changed from New to Closed
  • % Done changed from 0 to 100

git commit 8533946bd264dca901fdf56bf3da1d81e728b423

commit  8533946bd264dca901fdf56bf3da1d81e728b423
Author: Youzhong Yang <yyang@mathworks.com>
Date:   2017-02-27T13:17:39.000Z

    7804 fdisk_read_master_part_table() causes 'format' to crash
    Reviewed by: Jason King <jason.brian.king@gmail.com>
    Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
    Reviewed by: Robert Mustacchi <rm@joyent.com>
    Approved by: Dan McDonald <danmcd@omniti.com>

Actions #4

Updated by Nikola M. over 6 years ago

  • Related to Bug #7999: 'format' command says "Failed to initialise libfdisk" added
Actions

Also available in: Atom PDF