Project

General

Profile

Bug #7193

deadlock between zfs_rename and zfs_rmdir

Added by Andriy Gapon almost 4 years ago.

Status:
New
Priority:
Normal
Assignee:
-
Category:
zfs - Zettabyte File System
Start date:
2016-07-18
Due date:
% Done:

0%

Estimated time:
Difficulty:
Medium
Tags:
needs-triage
Gerrit CR:

Description

I've encountered a system deadlock by running a bunch of concurrent filesystem operations (POSIX) on a ZFS filesystem.
I have a crashdump generated with reboot -d for further analysis.

Here is my initial analysis of the deadlock.

RMDIR X/A (this is thread ffffff02acc483c0 in the dump)
This thread got a dirlock of X/A. That means a read lock on z_name_lock of X and an entry for A in X's z_dirlocks.
This thread wants a write lock on z_name_lock of X/A:
rw_enter(&zp->z_name_lock, RW_WRITER);

RENAME X/A/B => X/A (this is thread ffffff027b0054c0 in the dump)
[Yes, I am aware that this rename can not succeed, but it does not seem to be outright rejected early enough.]
z_id-s are such that the source is locked before target.
This thread got a dirlock of X/A/B. That means a read lock on z_name_lock of X/A and an entry for B in X/A's z_dirlocks.
The read lock on z_name_lock of X/A blocks the RMDIR thread.
This thread wants to get a dirlock of X/A, the target, but the RMDIR thread already got it.

Some data from mdb:

stack pointer for thread ffffff027b0054c0: ffffff00099f4970
[ ffffff00099f4970 _resume_from_idle+0x112() ]
  ffffff00099f49a0 swtch+0x18a()
  ffffff00099f49d0 cv_wait+0x89(ffffff02ab60f6d0, ffffff0299803ac8)
  ffffff00099f4ab0 zfs_dirent_lock+0x1d1(ffffff00099f4b60,
ffffff0299803ab0, ffffff0e0a38ec80, ffffff00099f4b78, 10, 0, 0)
  ffffff00099f4bf0 zfs_rename+0x5aa(ffffff05332e0200, ffffff070d165b02,
ffffff06311a3400, ffffff0e0a38ec80, ffffff027af42368, 0, ffffff0000000000)
  ffffff00099f4c80 fop_rename+0x68(ffffff05332e0200, ffffff070d165b02,
ffffff06311a3400, ffffff0e0a38ec80, ffffff027af42368, 0, ffffff0200000000)
  ffffff00099f4e30 vn_renameat+0x3ff(0, 80628a8, 0, 80627f8, 0)
  ffffff00099f4e90 renameat+0xdc(ffd19553, 80628a8, ffd19553, 80627f8)
  ffffff00099f4eb0 rename+0x22(80628a8, 80627f8)
  ffffff00099f4f00 sys_syscall32+0x1f7()

zfs_rename+0x547:               movl   -0xd4(%rbp),%r8d
zfs_rename+0x54e:               movq   -0xb8(%rbp),%rdx
zfs_rename+0x555:               leaq   -0x70(%rbp),%rcx
zfs_rename+0x559:               leaq   -0x88(%rbp),%rdi
zfs_rename+0x560:               xorl   %r9d,%r9d
zfs_rename+0x563:               movq   %rbx,%rsi
zfs_rename+0x566:               movq   $0x0,(%rsp)
zfs_rename+0x56e:               orl    $0x2,%r8d
zfs_rename+0x572:               call   -0x1d017 <zfs_dirent_lock>
zfs_rename+0x577:               movl   -0xd4(%rbp),%r8d
zfs_rename+0x57e:               movq   -0xb0(%rbp),%rdx
zfs_rename+0x585:               leaq   -0x78(%rbp),%rcx
zfs_rename+0x589:               leaq   -0x90(%rbp),%rdi
zfs_rename+0x590:               xorl   %r9d,%r9d
zfs_rename+0x593:               movq   %r13,%rsi
zfs_rename+0x596:               movl   %eax,%r12d
zfs_rename+0x599:               movq   $0x0,(%rsp)
zfs_rename+0x5a1:               orl    $0x10,%r8d
zfs_rename+0x5a5:               call   -0x1d04a <zfs_dirent_lock>
zfs_rename+0x5aa:               movl   %eax,%r15d
zfs_rename+0x5ad:               jmp    -0x310   <zfs_rename+0x2a2>

stack pointer for thread ffffff02acc483c0: ffffff0008616ad0
[ ffffff0008616ad0 _resume_from_idle+0x112() ]
  ffffff0008616b00 swtch+0x18a()
  ffffff0008616ba0 turnstile_block+0x2ea(0, 0, ffffff044f37e8b0,
fffffffffbc0ae80, 0, 0)
  ffffff0008616c10 rw_enter_sleep+0x1a9(ffffff044f37e8b0, 0)
  ffffff0008616cf0 zfs_rmdir+0x12c(ffffff06311a3400, ffffff0fd71bc800,
ffffff06311a3400, ffffff028afc7030, 0, 0)
  ffffff0008616d70 fop_rmdir+0x63(ffffff06311a3400, ffffff0fd71bc800,
ffffff06311a3400, ffffff028afc7030, 0, 0)
  ffffff0008616e40 vn_removeat+0x428(0, 80628a8, 0, 1)
  ffffff0008616e90 unlinkat+0x59(ffd19553, 80628a8, 1)
  ffffff0008616eb0 rmdir+0x1e(80628a8)
  ffffff0008616f00 sys_syscall32+0x1f7()

> zfs_rmdir+0x12c::dis
zfs_rmdir+0x101:                je     +0x191   <zfs_rmdir+0x298>
zfs_rmdir+0x107:                movq   -0x98(%rbp),%rcx
zfs_rmdir+0x10e:                movq   -0x80(%rbp),%rsi
zfs_rmdir+0x112:                movq   %r15,%rdx
zfs_rmdir+0x115:                movq   %r14,%rdi
zfs_rmdir+0x118:                call   +0x455aa73       <vnevent_rmdir>
zfs_rmdir+0x11d:                movq   -0x68(%rbp),%rdi
zfs_rmdir+0x121:                xorl   %esi,%esi
zfs_rmdir+0x123:                addq   $0x28,%rdi
zfs_rmdir+0x127:                call   +0x3ed7214       <rw_enter>
zfs_rmdir+0x12c:                movq   -0x68(%rbp),%rdi
zfs_rmdir+0x130:                xorl   %esi,%esi
zfs_rmdir+0x132:                addq   $0x20,%rdi
zfs_rmdir+0x136:                call   +0x3ed7205       <rw_enter>
zfs_rmdir+0x13b:                movq   0x10(%r12),%rdi
zfs_rmdir+0x140:                call   -0xab105 <dmu_tx_create>
zfs_rmdir+0x145:                movq   0x10(%r13),%rsi
zfs_rmdir+0x149:                movq   %r15,%rcx
zfs_rmdir+0x14c:                xorl   %edx,%edx
zfs_rmdir+0x14e:                movq   %rax,%rdi
zfs_rmdir+0x151:                movq   %rax,%rbx

> ffffff06311a3400::print vnode_t
{
    v_lock = {
        _opaque = [ 0 ]
    }
    v_flag = 0
    v_count = 0x37
    v_data = 0xffffff0299803ab0
    v_vfsp = 0xffffff026f4debe8
    v_stream = 0
    v_type = 2 (VDIR)
    v_rdev = 0xffffffffffffffff
    v_vfsmountedhere = 0
    v_op = 0xffffff02563a2540
    v_pages = 0
    v_filocks = 0
    v_shrlocks = 0
    v_nbllock = {
        _opaque = [ 0 ]
    }
    v_cv = {
        _opaque = 0
    }
    v_locality = 0
    v_femhead = 0
    v_path = 0xffffff025c1d2e38 "/testpool/test" 
    v_rdcnt = 0
    v_wrcnt = 0
    v_mmap_read = 0
    v_mmap_write = 0
    v_mpssdata = 0
    v_fopdata = 0
    v_vsd_lock = {
        _opaque = [ 0 ]
    }
    v_vsd = 0
    v_xattrdir = 0
    v_count_dnlc = 0x9
}

> ffffff05332e0200::print vnode_t
{
    v_lock = {
        _opaque = [ 0 ]
    }
    v_flag = 0
    v_count = 0x12
    v_data = 0xffffff044f37e888
    v_vfsp = 0xffffff026f4debe8
    v_stream = 0
    v_type = 2 (VDIR)
    v_rdev = 0xffffffffffffffff
    v_vfsmountedhere = 0
    v_op = 0xffffff02563a2540
    v_pages = 0
    v_filocks = 0
    v_shrlocks = 0
    v_nbllock = {
        _opaque = [ 0x10 ]
    }
    v_cv = {
        _opaque = 0
    }
    v_locality = 0
    v_femhead = 0
    v_path = 0xffffff0c7fffa1b0 "/testpool/test/3" 
    v_rdcnt = 0
    v_wrcnt = 0
    v_mmap_read = 0
    v_mmap_write = 0
    v_mpssdata = 0
    v_fopdata = 0
    v_vsd_lock = {
        _opaque = [ 0 ]
    }
    v_vsd = 0
    v_xattrdir = 0
    v_count_dnlc = 0x5
}

> ffffff0fd71bc800/s
0xffffff0fd71bc800:             3
> ffffff070d165b02/s
0xffffff070d165b02:             2
> ffffff0e0a38ec80/s
0xffffff0e0a38ec80:             3

So, this looks like
rmdir("/testpool/test/3")
vs
rename("/testpool/test/3/2", "/testpool/test/3")

Also available in: Atom PDF