Project

General

Profile

Bug #10468

__ctype_mask[EOF] has been working by accident

Added by John Levon 5 months ago. Updated 5 months ago.

Status:
Closed
Priority:
Normal
Assignee:
Category:
-
Start date:
2019-02-27
Due date:
% Done:

100%

Estimated time:
Difficulty:
Medium
Tags:
needs-triage

Description

In booting COAL with a gcc7-compiled libc, some variants of sed no longer functioned in that they simply spin:

[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# cat /etc/pkgsrc_version 
release: 2011Q4
architecture: i386
[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# sed --version
GNU sed version 4.2.1
Copyright (C) 2009 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,
to the extent permitted by law.

GNU sed home page: <http://www.gnu.org/software/sed/>.
General help using GNU software: <http://www.gnu.org/gethelp/>.
E-mail bug reports to: <bug-gnu-utils@gnu.org>.
Be sure to include the word ``sed'' somewhere in the ``Subject:'' field.
[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# echo foo | sed -e 's/foo/bar/'

Here is the code that we're spinning in:

in_nonblank:                    pushl  %ebp
in_nonblank+1:                  movl   %esp,%ebp
in_nonblank+3:                  subl   $0x8,%esp
in_nonblank+6:                  movl   %esi,%esi
in_nonblank+8:                  call   -0xad    <inchar>
in_nonblank+0xd:                movl   0x80830cc,%edx   <gsed`__ctype_mask>
in_nonblank+0x13:               testb  $0x40,(%edx,%eax,4)
in_nonblank+0x17:               jne    -0x11    <in_nonblank+8>
in_nonblank+0x19:               leave  
in_nonblank+0x1a:               ret    

The problem is that inchar is (properly) returning EOF (-1), and we are using that to dereference off of __ctype_mask:

> __ctype_mask/K 
gsed`__ctype_mask:
gsed`__ctype_mask:              fee1b7a0        
> fee1b7a0-4,4/Xna
libc.so.1`___trans_lower+0x3fc: ff              
libc.so.1`___ctype_mask:        20              
libc.so.1`___ctype_mask+4:      20              
libc.so.1`___ctype_mask+8:      20              
libc.so.1`___ctype_mask+0xc:    

The problem is that this code is (implicitly) relying on the padding that the compiler was providing in that it is expecting that __ctype_mask[-1] is 0 -- but gcc7 has structured this differently, putting ___trans_lower ahead of ___ctype_mask.

Ironically, the code in sed would have worked correctly (if accidentally) were it not for IRIX 4.0.5; here's the code in this ancient version of sed that defines ISBLANK:

/* handle misdesigned <ctype.h> macros (snarfed from lib/regex.c) */
/* Jim Meyering writes:

   "... Some ctype macros are valid only for character codes that
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
   using /bin/cc or gcc but without giving an ansi option).  So, all
   ctype uses should be through macros like ISPRINT...  If
   STDC_HEADERS is defined, then autoconf has verified that the ctype
   macros don't need to be guarded with references to isascii. ...
   Defining isascii to 1 should let any compiler worth its salt
   eliminate the && through constant folding." 
   Solaris defines some of these symbols so we must undefine them first. */

#undef ISASCII
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
# define ISASCII(c) 1
#else
# define ISASCII(c) isascii(c)
#endif

#if defined isblank || defined HAVE_ISBLANK
# define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif

isascii -- even of this platform's vintage -- would have correctly returned 0 on EOF.

In any case, the fix is to expand ___ctype_mask and then point __ctype_mask to be &___ctype_mask[1]:

diff --git a/usr/src/lib/libc/port/locale/table.c b/usr/src/lib/libc/port/locale/table.c
index 3c0ce09901..2a6517a48f 100644
--- a/usr/src/lib/libc/port/locale/table.c
+++ b/usr/src/lib/libc/port/locale/table.c
@@ -39,7 +39,7 @@
 #include "mblocal.h" 
 #include "_ctype.h" 

-#define    _DEFRUNETYPE { \
+#define    _DEFRUNETYPE \
     /* 00 */ \
     _CTYPE_C, \
     _CTYPE_C, \
@@ -183,10 +183,9 @@
     _CTYPE_P|_CTYPE_R|_CTYPE_G, \
     _CTYPE_P|_CTYPE_R|_CTYPE_G, \
     _CTYPE_P|_CTYPE_R|_CTYPE_G, \
-    _CTYPE_C, \
-}
+    _CTYPE_C

-#define    _DEFMAPLOWER { \
+#define    _DEFMAPLOWER \
     0x00,    0x01,    0x02,    0x03,    0x04,    0x05,    0x06,    0x07, \
     0x08,    0x09,    0x0a,    0x0b,    0x0c,    0x0d,    0x0e,    0x0f, \
     0x10,    0x11,    0x12,    0x13,    0x14,    0x15,    0x16,    0x17, \
@@ -218,10 +217,9 @@
     0xe0,    0xe1,    0xe2,    0xe3,    0xe4,    0xe5,    0xe6,    0xe7, \
     0xe8,    0xe9,    0xea,    0xeb,    0xec,    0xed,    0xee,    0xef, \
     0xf0,    0xf1,    0xf2,    0xf3,    0xf4,    0xf5,    0xf6,    0xf7, \
-    0xf8,    0xf9,    0xfa,    0xfb,    0xfc,    0xfd,    0xfe,    0xff, \
-}
+    0xf8,    0xf9,    0xfa,    0xfb,    0xfc,    0xfd,    0xfe,    0xff

-#define    _DEFMAPUPPER { \
+#define    _DEFMAPUPPER \
     0x00,    0x01,    0x02,    0x03,    0x04,    0x05,    0x06,    0x07, \
     0x08,    0x09,    0x0a,    0x0b,    0x0c,    0x0d,    0x0e,    0x0f, \
     0x10,    0x11,    0x12,    0x13,    0x14,    0x15,    0x16,    0x17, \
@@ -253,27 +251,32 @@
     0xe0,    0xe1,    0xe2,    0xe3,    0xe4,    0xe5,    0xe6,    0xe7, \
     0xe8,    0xe9,    0xea,    0xeb,    0xec,    0xed,    0xee,    0xef, \
     0xf0,    0xf1,    0xf2,    0xf3,    0xf4,    0xf5,    0xf6,    0xf7, \
-    0xf8,    0xf9,    0xfa,    0xfb,    0xfc,    0xfd,    0xfe,    0xff, \
-}
+    0xf8,    0xf9,    0xfa,    0xfb,    0xfc,    0xfd,    0xfe,    0xff,

 _RuneLocale _DefaultRuneLocale = {
     _RUNE_MAGIC_1,
     "NONE",
-    _DEFRUNETYPE,
-    _DEFMAPLOWER,
-    _DEFMAPUPPER,
+    { _DEFRUNETYPE },
+    { _DEFMAPLOWER },
+    { _DEFMAPUPPER },
 };

 /*
  * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and
  * have to stay pointers for binary compatibility, so we provide separate
- * storage for them, initialized to "C" locale contents by default.
+ * storage for them, initialized to "C" locale contents by default.  Note that
+ * legacy code may dereference __ctype_mask[-1] when checking against EOF,
+ * relying on that value to be 0.  To allow this, ___ctype_mask is expanded by
+ * one value and prepended with a leading 0, with __ctype_mask being set to
+ * point to ___ctype_mask[1].  (__trans_lower and __trans_upper do not suffer
+ * from this as EOF access was prevented in legacy code by a check against
+ * isascii(), which always returned 0 for EOF.)
  */
-static unsigned int ___ctype_mask[_CACHED_RUNES] = _DEFRUNETYPE;
-unsigned int *__ctype_mask = ___ctype_mask;
+static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE };
+unsigned int *__ctype_mask = &___ctype_mask[1];

-static int ___trans_lower[_CACHED_RUNES] = _DEFMAPLOWER;
+static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER };
 int *__trans_lower = ___trans_lower;

-static int ___trans_upper[_CACHED_RUNES] = _DEFMAPUPPER;
+static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER };
 int *__trans_upper = ___trans_upper;

This bug mirrors Joyent upstream bug OS-7498


Related issues

Related to illumos gate - Feature #9996: use GCC 7 as default primary compilerClosed2018-11-21

Actions

History

#1

Updated by John Levon 5 months ago

#2

Updated by John Levon 5 months ago

  • Related to Feature #9996: use GCC 7 as default primary compiler added
#3

Updated by Joshua Clulow 5 months ago

  • Description updated (diff)
#4

Updated by Electric Monk 5 months ago

  • Status changed from New to Closed
  • % Done changed from 0 to 100

git commit 92c1a61163ff6a0655b27bd429856e171e7ce5f5

commit  92c1a61163ff6a0655b27bd429856e171e7ce5f5
Author: Bryan Cantrill <bryan@joyent.com>
Date:   2019-03-01T11:11:54.000Z

    10468 __ctype_mask[EOF] has been working by accident
    10469 GCC's -faggressive-loop-optimizations is too aggressive
    10470 array over-read in has_saved_fp()
    Reviewed by: Robert Mustacchi <rm@joyent.com>
    Reviewed by: John Levon <john.levon@joyent.com>
    Reviewed by: Toomas Soome <tsoome@me.com>
    Reviewed by: Igor Kozhukhov <igor@dilos.org>
    Reviewed by: Andy Fiddaman <andy@omniosce.org>
    Reviewed by: Gergő Doma <domag02@gmail.com>
    Reviewed by: Gary Mills <gary_mills@fastmail.fm>
    Approved by: Richard Lowe <richlowe@richlowe.net>

Also available in: Atom PDF