Project

General

Profile

Feature #7709

Updated by Patrick Mooney about 1 year ago

Upstreaming of "OS-5655":https://smartos.org/bugview/OS-5655 from SmartOS: 

 The family of functions for converted between time formats (hrt2ts, ts2hrt, etc) use a variety of tricks in an attempt to increase performance. While these techniques may have been valuable long ago, they appear to be harmful on modern x86_64 CPUs. 

 These routines should be checked and, when it would improve performance, the "optimizations" should be disabled via #ifdef. 

 --- 

 I wrote a crude little test program to benchmark the existing code against new "de-optimized" versions: 

 <pre> 
 #include <stdio.h> 
 #include <limits.h> 
 #include <sys/types.h> 
 #include <sys/time.h> 

 #define TESTCOUNT         10000000 

 void 
 hrt2ts_old(hrtime_t hrt, timespec_t *tsp) 
 { 
         uint32_t sec, nsec, tmp; 

         tmp = (uint32_t)(hrt >> 30); 
         sec = tmp - (tmp >> 2); 
         sec = tmp - (sec >> 5); 
         sec = tmp + (sec >> 1); 
         sec = tmp - (sec >> 6) + 7; 
         sec = tmp - (sec >> 3); 
         sec = tmp + (sec >> 1); 
         sec = tmp + (sec >> 3); 
         sec = tmp + (sec >> 4); 
         tmp = (sec << 7) - sec - sec - sec; 
         tmp = (tmp << 7) - tmp - tmp - tmp; 
         tmp = (tmp << 7) - tmp - tmp - tmp; 
         nsec = (uint32_t)hrt - (tmp << 9); 
         while (nsec >= NANOSEC) { 
                 nsec -= NANOSEC; 
                 sec++; 
         } 
         tsp->tv_sec = (time_t)sec; 
         tsp->tv_nsec = nsec; 
 } 
 void 
 hrt2ts_new(hrtime_t hrt, timespec_t *tsp) 
 { 
         tsp->tv_sec = (time_t)(hrt / NANOSEC); 
         tsp->tv_nsec = (hrt % NANOSEC); 
 } 

 void 
 do_test_hrt2ts(timespec_t *tsp, void (*tf)(hrtime_t, timespec_t *)) 
 { 
         int i; 
         hrtime_t hrt; 

         hrt = gethrtime(); 
         for (i = 0; i < TESTCOUNT; i++) { 
                 tf(hrt, tsp); 
         } 
 } 
 void 
 test_hrt2ts(void (*tf)(hrtime_t, timespec_t *), const char *name) 
 { 
         timespec_t start, end; 
         uint64_t diff; 
         timespec_t tsp; 

         clock_gettime(CLOCK_MONOTONIC, &start); 
         do_test_hrt2ts(&tsp, tf); 
         clock_gettime(CLOCK_MONOTONIC, &end); 
         if (end.tv_nsec < start.tv_nsec) { 
                 end.tv_sec -= 1; 
                 end.tv_nsec += NANOSEC; 
         } 
         diff = end.tv_nsec - start.tv_nsec; 
         diff += (end.tv_sec - start.tv_sec) * NANOSEC; 
         printf("%s out: %lld %lld\n", name, tsp.tv_sec, tsp.tv_nsec); 
         printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT); 

 } 


 hrtime_t 
 ts2hrt_old(timespec_t *tsp) 
 { 
         hrtime_t hrt; 

         hrt = tsp->tv_sec; 
         hrt = (hrt << 7) - hrt - hrt - hrt; 
         hrt = (hrt << 7) - hrt - hrt - hrt; 
         hrt = (hrt << 7) - hrt - hrt - hrt; 
         hrt = (hrt << 9) + tsp->tv_nsec; 
         return (hrt); 
 } 
 hrtime_t 
 ts2hrt_new(timespec_t *tsp) 
 { 
         return ((tsp->tv_sec * NANOSEC) + tsp->tv_nsec); 
 } 

 hrtime_t do_test_ts2hrt(timespec_t *tsp, hrtime_t (*tf)(timespec_t *)) 
 { 
         int i; 
         hrtime_t hrt; 

         hrt = gethrtime(); 
         for (i = 0; i < TESTCOUNT; i++) { 
                 hrt = tf(tsp); 
         } 
         return (hrt); 
 } 
 void test_ts2hrt(hrtime_t (*tf)(timespec_t *), const char *name) 
 { 
         timespec_t start, end; 
         timestruc_t tsc; 
         uint64_t diff; 
         hrtime_t hrt; 

         clock_gettime(CLOCK_MONOTONIC, &start); 
         hrt = do_test_ts2hrt(&start, tf); 
         clock_gettime(CLOCK_MONOTONIC, &end); 
         if (end.tv_nsec < start.tv_nsec) { 
                 end.tv_sec -= 1; 
                 end.tv_nsec += NANOSEC; 
         } 
         diff = end.tv_nsec - start.tv_nsec; 
         diff += (end.tv_sec - start.tv_sec) * NANOSEC; 
         printf("%s out: %lld\n", name, hrt); 
         printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT); 

 } 

 void 
 hrt2tv_old(hrtime_t hrt, struct timeval *tvp) 
 { 
         uint32_t sec, nsec, tmp; 
         uint32_t q, r, t; 

         tmp = (uint32_t)(hrt >> 30); 
         sec = tmp - (tmp >> 2); 
         sec = tmp - (sec >> 5); 
         sec = tmp + (sec >> 1); 
         sec = tmp - (sec >> 6) + 7; 
         sec = tmp - (sec >> 3); 
         sec = tmp + (sec >> 1); 
         sec = tmp + (sec >> 3); 
         sec = tmp + (sec >> 4); 
         tmp = (sec << 7) - sec - sec - sec; 
         tmp = (tmp << 7) - tmp - tmp - tmp; 
         tmp = (tmp << 7) - tmp - tmp - tmp; 
         nsec = (uint32_t)hrt - (tmp << 9); 
         while (nsec >= NANOSEC) { 
                 nsec -= NANOSEC; 
                 sec++; 
         } 
         tvp->tv_sec = (time_t)sec; 

         t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12); 
         q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14); 
         q = q >> 9; 
         r = nsec - q*1000; 
         tvp->tv_usec = q + ((r + 24) >> 10); 

 } 
 void 
 hrt2tv_new(hrtime_t hrt, struct timeval *tvp) 
 { 
         tvp->tv_sec = (time_t)(hrt / NANOSEC); 
         tvp->tv_usec = ((hrt % NANOSEC) / 1000); 
 } 

 void 
 do_test_hrt2tv(struct timeval *tvp, void (*tf)(hrtime_t, struct timeval *)) 
 { 
         int i; 
         hrtime_t hrt; 

         hrt = gethrtime(); 
         for (i = 0; i < TESTCOUNT; i++) { 
                 tf(hrt, tvp); 
         } 
 } 
 void 
 test_hrt2tv(void (*tf)(hrtime_t, struct timeval *), const char *name) 
 { 
         timespec_t start, end; 
         uint64_t diff; 
         struct timeval tvp; 

         clock_gettime(CLOCK_MONOTONIC, &start); 
         do_test_hrt2tv(&tvp, tf); 
         clock_gettime(CLOCK_MONOTONIC, &end); 
         if (end.tv_nsec < start.tv_nsec) { 
                 end.tv_sec -= 1; 
                 end.tv_nsec += NANOSEC; 
         } 
         diff = end.tv_nsec - start.tv_nsec; 
         diff += (end.tv_sec - start.tv_sec) * NANOSEC; 
         printf("%s out: %ld %ld\n", name, tvp.tv_sec, tvp.tv_usec); 
         printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT); 

 } 

 int main() 
 { 
         int count; 

         test_hrt2ts(hrt2ts_old, "hrt2ts_old"); 
         test_hrt2ts(hrt2ts_new, "hrt2ts_new"); 

         test_ts2hrt(ts2hrt_old, "ts2hrt_old"); 
         test_ts2hrt(ts2hrt_new, "ts2hrt_new"); 

         test_hrt2tv(hrt2tv_old, "hrt2tv_old"); 
         test_hrt2tv(hrt2tv_new, "hrt2tv_new"); 
 } 
 </pre> 

 Here are the results for 64-bit: 

 <pre> 
 [pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ gcc -O2 -m64 test.c 
 [pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ ./a.out 
 hrt2ts_old out: 3999579 457906879 
 hrt2ts_old: 65166214 for 10000000 iterations (6ns per) 
 hrt2ts_new out: 3999579 523236995 
 hrt2ts_new: 27384771 for 10000000 iterations (2ns per) 
 ts2hrt_old out: 3999579550630130 
 ts2hrt_old: 37698537 for 10000000 iterations (3ns per) 
 ts2hrt_new out: 3999579588336809 
 ts2hrt_new: 20534698 for 10000000 iterations (2ns per) 
 hrt2tv_old out: 3999579 608879 
 hrt2tv_old: 132647645 for 10000000 iterations (13ns per) 
 hrt2tv_new out: 3999579 741541 
 hrt2tv_new: 42581307 for 10000000 iterations (4ns per) 
 </pre> 

 ... and 32-bit: 

 <pre> 
 [pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ gcc -O2 -m32 test.c 
 [pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ ./a.out 
 hrt2ts_old out: 505795097040587133 -81064690078906408 
 hrt2ts_old: 93591836 for 10000000 iterations (9ns per) 
 hrt2ts_new out: 908324255269980541 38664705664 
 hrt2ts_new: 208671683 for 10000000 iterations (20ns per) 
 ts2hrt_old out: 4000125420170843 
 ts2hrt_old: 121201904 for 10000000 iterations (12ns per) 
 ts2hrt_new out: 4000125541384366 
 ts2hrt_new: 30899755 for 10000000 iterations (3ns per) 
 hrt2tv_old out: 4000125 572295 
 hrt2tv_old: 161604889 for 10000000 iterations (16ns per) 
 hrt2tv_new out: 4000125 733913 
 hrt2tv_new: 310336290 for 10000000 iterations (31ns per) 
 </pre>

Back