Project

General

Profile

Bug #10468

Updated by Joshua M. Clulow over 3 years ago

In booting COAL with a gcc7-compiled libc, some variants of sed no longer functioned in that they simply spin: 

 <pre> 
 [root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# cat /etc/pkgsrc_version  
 release: 2011Q4 
 architecture: i386 
 [root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# sed --version 
 GNU sed version 4.2.1 
 Copyright (C) 2009 Free Software Foundation, Inc. 
 This is free software; see the source for copying conditions.    There is NO 
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, 
 to the extent permitted by law. 

 GNU sed home page: <http://www.gnu.org/software/sed/>. 
 General help using GNU software: <http://www.gnu.org/gethelp/>. 
 E-mail bug reports to: <bug-gnu-utils@gnu.org>. 
 Be sure to include the word ``sed'' somewhere in the ``Subject:'' field. 
 [root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# echo foo | sed -e 's/foo/bar/' 
 </pre> 

 Here is the code that we're spinning in: 

 <pre> 
 in_nonblank:                      pushl    %ebp 
 in_nonblank+1:                    movl     %esp,%ebp 
 in_nonblank+3:                    subl     $0x8,%esp 
 in_nonblank+6:                    movl     %esi,%esi 
 in_nonblank+8:                    call     -0xad      <inchar> 
 in_nonblank+0xd:                  movl     0x80830cc,%edx     <gsed`__ctype_mask> 
 in_nonblank+0x13:                 testb    $0x40,(%edx,%eax,4) 
 in_nonblank+0x17:                 jne      -0x11      <in_nonblank+8> 
 in_nonblank+0x19:                 leave   
 in_nonblank+0x1a:                 ret     
 </pre> 

 The problem is that @inchar@ is (properly) returning EOF (-1), and we are using that to dereference off of @__ctype_mask@: 

 <pre> 
 > __ctype_mask/K  
 gsed`__ctype_mask: 
 gsed`__ctype_mask:                fee1b7a0         
 > fee1b7a0-4,4/Xna 
 libc.so.1`___trans_lower+0x3fc: ff               
 libc.so.1`___ctype_mask:          20               
 libc.so.1`___ctype_mask+4:        20               
 libc.so.1`___ctype_mask+8:        20               
 libc.so.1`___ctype_mask+0xc:     
 </pre> 

 The problem is that this code is (implicitly) relying on the padding that the compiler was providing in that it is expecting that @__ctype_mask[-1]@ is 0 -- but gcc7 has structured this differently, putting @___trans_lower@ ahead of @___ctype_mask@. 

 Ironically, the code in sed would have worked correctly (if accidentally) were it not for IRIX 4.0.5; here's the code in this ancient version of sed that defines @ISBLANK@: 

 <pre> 
 /* handle misdesigned <ctype.h> macros (snarfed from lib/regex.c) */ 
 /* Jim Meyering writes: 

    "... Some ctype macros are valid only for character codes that 
    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 
    using /bin/cc or gcc but without giving an ansi option).    So, all 
    ctype uses should be through macros like ISPRINT...    If 
    STDC_HEADERS is defined, then autoconf has verified that the ctype 
    macros don't need to be guarded with references to isascii. ... 
    Defining isascii to 1 should let any compiler worth its salt 
    eliminate the && through constant folding." 
    Solaris defines some of these symbols so we must undefine them first. */ 

 #undef ISASCII 
 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 
 # define ISASCII(c) 1 
 #else 
 # define ISASCII(c) isascii(c) 
 #endif 

 #if defined isblank || defined HAVE_ISBLANK 
 # define ISBLANK(c) (ISASCII (c) && isblank (c)) 
 #else 
 # define ISBLANK(c) ((c) == ' ' || (c) == '\t') 
 #endif 
 </pre> 

 @isascii@ -- even of this platform's vintage -- would have correctly returned 0 on EOF. 

 In any case, the fix is to expand @___ctype_mask@ and then point @__ctype_mask@ to be @&___ctype_mask[1]@: 

 <pre> 
 diff --git a/usr/src/lib/libc/port/locale/table.c b/usr/src/lib/libc/port/locale/table.c 
 index 3c0ce09901..2a6517a48f 100644 
 --- a/usr/src/lib/libc/port/locale/table.c 
 +++ b/usr/src/lib/libc/port/locale/table.c 
 @@ -39,7 +39,7 @@ 
  #include "mblocal.h" 
  #include "_ctype.h" 
 
 -#define 	 _DEFRUNETYPE { \ 
 +#define 	 _DEFRUNETYPE \ 
 	 /* 00 */ \ 
 	 _CTYPE_C, \ 
 	 _CTYPE_C, \ 
 @@ -183,10 +183,9 @@ 
 	 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 
 	 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 
 	 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 
 - 	 _CTYPE_C, \ 
 -} 
 + 	 _CTYPE_C 
 
 -#define 	 _DEFMAPLOWER { \ 
 +#define 	 _DEFMAPLOWER \ 
 	 0x00, 	 0x01, 	 0x02, 	 0x03, 	 0x04, 	 0x05, 	 0x06, 	 0x07, \ 
 	 0x08, 	 0x09, 	 0x0a, 	 0x0b, 	 0x0c, 	 0x0d, 	 0x0e, 	 0x0f, \ 
 	 0x10, 	 0x11, 	 0x12, 	 0x13, 	 0x14, 	 0x15, 	 0x16, 	 0x17, \ 
 @@ -218,10 +217,9 @@ 
 	 0xe0, 	 0xe1, 	 0xe2, 	 0xe3, 	 0xe4, 	 0xe5, 	 0xe6, 	 0xe7, \ 
 	 0xe8, 	 0xe9, 	 0xea, 	 0xeb, 	 0xec, 	 0xed, 	 0xee, 	 0xef, \ 
 	 0xf0, 	 0xf1, 	 0xf2, 	 0xf3, 	 0xf4, 	 0xf5, 	 0xf6, 	 0xf7, \ 
 - 	 0xf8, 	 0xf9, 	 0xfa, 	 0xfb, 	 0xfc, 	 0xfd, 	 0xfe, 	 0xff, \ 
 -} 
 + 	 0xf8, 	 0xf9, 	 0xfa, 	 0xfb, 	 0xfc, 	 0xfd, 	 0xfe, 	 0xff 
 
 -#define 	 _DEFMAPUPPER { \ 
 +#define 	 _DEFMAPUPPER \ 
 	 0x00, 	 0x01, 	 0x02, 	 0x03, 	 0x04, 	 0x05, 	 0x06, 	 0x07, \ 
 	 0x08, 	 0x09, 	 0x0a, 	 0x0b, 	 0x0c, 	 0x0d, 	 0x0e, 	 0x0f, \ 
 	 0x10, 	 0x11, 	 0x12, 	 0x13, 	 0x14, 	 0x15, 	 0x16, 	 0x17, \ 
 @@ -253,27 +251,32 @@ 
 	 0xe0, 	 0xe1, 	 0xe2, 	 0xe3, 	 0xe4, 	 0xe5, 	 0xe6, 	 0xe7, \ 
 	 0xe8, 	 0xe9, 	 0xea, 	 0xeb, 	 0xec, 	 0xed, 	 0xee, 	 0xef, \ 
 	 0xf0, 	 0xf1, 	 0xf2, 	 0xf3, 	 0xf4, 	 0xf5, 	 0xf6, 	 0xf7, \ 
 - 	 0xf8, 	 0xf9, 	 0xfa, 	 0xfb, 	 0xfc, 	 0xfd, 	 0xfe, 	 0xff, \ 
 -} 
 + 	 0xf8, 	 0xf9, 	 0xfa, 	 0xfb, 	 0xfc, 	 0xfd, 	 0xfe, 	 0xff, 
 
  _RuneLocale _DefaultRuneLocale = { 
 	 _RUNE_MAGIC_1, 
 	 "NONE", 
 - 	 _DEFRUNETYPE, 
 - 	 _DEFMAPLOWER, 
 - 	 _DEFMAPUPPER, 
 + 	 { _DEFRUNETYPE }, 
 + 	 { _DEFMAPLOWER }, 
 + 	 { _DEFMAPUPPER }, 
  }; 
 
  /* 
   * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and 
   * have to stay pointers for binary compatibility, so we provide separate 
 - * storage for them, initialized to "C" locale contents by default. 
 + * storage for them, initialized to "C" locale contents by default.    Note that 
 + * legacy code may dereference __ctype_mask[-1] when checking against EOF, 
 + * relying on that value to be 0.    To allow this, ___ctype_mask is expanded by 
 + * one value and prepended with a leading 0, with __ctype_mask being set to 
 + * point to ___ctype_mask[1].    (__trans_lower and __trans_upper do not suffer 
 + * from this as EOF access was prevented in legacy code by a check against 
 + * isascii(), which always returned 0 for EOF.) 
   */ 
 -static unsigned int ___ctype_mask[_CACHED_RUNES] = _DEFRUNETYPE; 
 -unsigned int *__ctype_mask = ___ctype_mask; 
 +static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE }; 
 +unsigned int *__ctype_mask = &___ctype_mask[1]; 
 
 -static int ___trans_lower[_CACHED_RUNES] = _DEFMAPLOWER; 
 +static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER }; 
  int *__trans_lower = ___trans_lower; 
 
 -static int ___trans_upper[_CACHED_RUNES] = _DEFMAPUPPER; 
 +static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER }; 
  int *__trans_upper = ___trans_upper; 
 </pre> 

 This bug mirrors As per Joyent upstream bug "OS-7498":https://smartos.org/bugview/OS-7498 OS-7498

Back