Project

General

Profile

Actions

Feature #7709

closed

hrt2ts and friends are too clever for their own good

Added by Robert Mustacchi over 4 years ago. Updated about 1 year ago.

Status:
Closed
Priority:
Normal
Category:
kernel
Start date:
2016-12-30
Due date:
% Done:

100%

Estimated time:
Difficulty:
Medium
Tags:
Gerrit CR:

Description

Upstreaming of OS-5655 from SmartOS:

The family of functions for converted between time formats (hrt2ts, ts2hrt, etc) use a variety of tricks in an attempt to increase performance. While these techniques may have been valuable long ago, they appear to be harmful on modern x86_64 CPUs.

These routines should be checked and, when it would improve performance, the "optimizations" should be disabled via #ifdef.


I wrote a crude little test program to benchmark the existing code against new "de-optimized" versions:

#include <stdio.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/time.h>

#define TESTCOUNT       10000000

void
hrt2ts_old(hrtime_t hrt, timespec_t *tsp)
{
        uint32_t sec, nsec, tmp;

        tmp = (uint32_t)(hrt >> 30);
        sec = tmp - (tmp >> 2);
        sec = tmp - (sec >> 5);
        sec = tmp + (sec >> 1);
        sec = tmp - (sec >> 6) + 7;
        sec = tmp - (sec >> 3);
        sec = tmp + (sec >> 1);
        sec = tmp + (sec >> 3);
        sec = tmp + (sec >> 4);
        tmp = (sec << 7) - sec - sec - sec;
        tmp = (tmp << 7) - tmp - tmp - tmp;
        tmp = (tmp << 7) - tmp - tmp - tmp;
        nsec = (uint32_t)hrt - (tmp << 9);
        while (nsec >= NANOSEC) {
                nsec -= NANOSEC;
                sec++;
        }
        tsp->tv_sec = (time_t)sec;
        tsp->tv_nsec = nsec;
}
void
hrt2ts_new(hrtime_t hrt, timespec_t *tsp)
{
        tsp->tv_sec = (time_t)(hrt / NANOSEC);
        tsp->tv_nsec = (hrt % NANOSEC);
}

void
do_test_hrt2ts(timespec_t *tsp, void (*tf)(hrtime_t, timespec_t *))
{
        int i;
        hrtime_t hrt;

        hrt = gethrtime();
        for (i = 0; i < TESTCOUNT; i++) {
                tf(hrt, tsp);
        }
}
void
test_hrt2ts(void (*tf)(hrtime_t, timespec_t *), const char *name)
{
        timespec_t start, end;
        uint64_t diff;
        timespec_t tsp;

        clock_gettime(CLOCK_MONOTONIC, &start);
        do_test_hrt2ts(&tsp, tf);
        clock_gettime(CLOCK_MONOTONIC, &end);
        if (end.tv_nsec < start.tv_nsec) {
                end.tv_sec -= 1;
                end.tv_nsec += NANOSEC;
        }
        diff = end.tv_nsec - start.tv_nsec;
        diff += (end.tv_sec - start.tv_sec) * NANOSEC;
        printf("%s out: %lld %lld\n", name, tsp.tv_sec, tsp.tv_nsec);
        printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT);

}

hrtime_t
ts2hrt_old(timespec_t *tsp)
{
        hrtime_t hrt;

        hrt = tsp->tv_sec;
        hrt = (hrt << 7) - hrt - hrt - hrt;
        hrt = (hrt << 7) - hrt - hrt - hrt;
        hrt = (hrt << 7) - hrt - hrt - hrt;
        hrt = (hrt << 9) + tsp->tv_nsec;
        return (hrt);
}
hrtime_t
ts2hrt_new(timespec_t *tsp)
{
        return ((tsp->tv_sec * NANOSEC) + tsp->tv_nsec);
}

hrtime_t do_test_ts2hrt(timespec_t *tsp, hrtime_t (*tf)(timespec_t *))
{
        int i;
        hrtime_t hrt;

        hrt = gethrtime();
        for (i = 0; i < TESTCOUNT; i++) {
                hrt = tf(tsp);
        }
        return (hrt);
}
void test_ts2hrt(hrtime_t (*tf)(timespec_t *), const char *name)
{
        timespec_t start, end;
        timestruc_t tsc;
        uint64_t diff;
        hrtime_t hrt;

        clock_gettime(CLOCK_MONOTONIC, &start);
        hrt = do_test_ts2hrt(&start, tf);
        clock_gettime(CLOCK_MONOTONIC, &end);
        if (end.tv_nsec < start.tv_nsec) {
                end.tv_sec -= 1;
                end.tv_nsec += NANOSEC;
        }
        diff = end.tv_nsec - start.tv_nsec;
        diff += (end.tv_sec - start.tv_sec) * NANOSEC;
        printf("%s out: %lld\n", name, hrt);
        printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT);

}

void
hrt2tv_old(hrtime_t hrt, struct timeval *tvp)
{
        uint32_t sec, nsec, tmp;
        uint32_t q, r, t;

        tmp = (uint32_t)(hrt >> 30);
        sec = tmp - (tmp >> 2);
        sec = tmp - (sec >> 5);
        sec = tmp + (sec >> 1);
        sec = tmp - (sec >> 6) + 7;
        sec = tmp - (sec >> 3);
        sec = tmp + (sec >> 1);
        sec = tmp + (sec >> 3);
        sec = tmp + (sec >> 4);
        tmp = (sec << 7) - sec - sec - sec;
        tmp = (tmp << 7) - tmp - tmp - tmp;
        tmp = (tmp << 7) - tmp - tmp - tmp;
        nsec = (uint32_t)hrt - (tmp << 9);
        while (nsec >= NANOSEC) {
                nsec -= NANOSEC;
                sec++;
        }
        tvp->tv_sec = (time_t)sec;

        t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
        q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
        q = q >> 9;
        r = nsec - q*1000;
        tvp->tv_usec = q + ((r + 24) >> 10);

}
void
hrt2tv_new(hrtime_t hrt, struct timeval *tvp)
{
        tvp->tv_sec = (time_t)(hrt / NANOSEC);
        tvp->tv_usec = ((hrt % NANOSEC) / 1000);
}

void
do_test_hrt2tv(struct timeval *tvp, void (*tf)(hrtime_t, struct timeval *))
{
        int i;
        hrtime_t hrt;

        hrt = gethrtime();
        for (i = 0; i < TESTCOUNT; i++) {
                tf(hrt, tvp);
        }
}
void
test_hrt2tv(void (*tf)(hrtime_t, struct timeval *), const char *name)
{
        timespec_t start, end;
        uint64_t diff;
        struct timeval tvp;

        clock_gettime(CLOCK_MONOTONIC, &start);
        do_test_hrt2tv(&tvp, tf);
        clock_gettime(CLOCK_MONOTONIC, &end);
        if (end.tv_nsec < start.tv_nsec) {
                end.tv_sec -= 1;
                end.tv_nsec += NANOSEC;
        }
        diff = end.tv_nsec - start.tv_nsec;
        diff += (end.tv_sec - start.tv_sec) * NANOSEC;
        printf("%s out: %ld %ld\n", name, tvp.tv_sec, tvp.tv_usec);
        printf("%s: %lld for %d iterations (%lldns per)\n", name, diff, TESTCOUNT, diff/TESTCOUNT);

}

int main()
{
        int count;

        test_hrt2ts(hrt2ts_old, "hrt2ts_old");
        test_hrt2ts(hrt2ts_new, "hrt2ts_new");

        test_ts2hrt(ts2hrt_old, "ts2hrt_old");
        test_ts2hrt(ts2hrt_new, "ts2hrt_new");

        test_hrt2tv(hrt2tv_old, "hrt2tv_old");
        test_hrt2tv(hrt2tv_new, "hrt2tv_new");
}

Here are the results for 64-bit:

[pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ gcc -O2 -m64 test.c
[pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ ./a.out
hrt2ts_old out: 3999579 457906879
hrt2ts_old: 65166214 for 10000000 iterations (6ns per)
hrt2ts_new out: 3999579 523236995
hrt2ts_new: 27384771 for 10000000 iterations (2ns per)
ts2hrt_old out: 3999579550630130
ts2hrt_old: 37698537 for 10000000 iterations (3ns per)
ts2hrt_new out: 3999579588336809
ts2hrt_new: 20534698 for 10000000 iterations (2ns per)
hrt2tv_old out: 3999579 608879
hrt2tv_old: 132647645 for 10000000 iterations (13ns per)
hrt2tv_new out: 3999579 741541
hrt2tv_new: 42581307 for 10000000 iterations (4ns per)

... and 32-bit:

[pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ gcc -O2 -m32 test.c
[pmooney@fbf78115-e35d-41ad-8de1-c7729e7ef507 ~]$ ./a.out
hrt2ts_old out: 505795097040587133 -81064690078906408
hrt2ts_old: 93591836 for 10000000 iterations (9ns per)
hrt2ts_new out: 908324255269980541 38664705664
hrt2ts_new: 208671683 for 10000000 iterations (20ns per)
ts2hrt_old out: 4000125420170843
ts2hrt_old: 121201904 for 10000000 iterations (12ns per)
ts2hrt_new out: 4000125541384366
ts2hrt_new: 30899755 for 10000000 iterations (3ns per)
hrt2tv_old out: 4000125 572295
hrt2tv_old: 161604889 for 10000000 iterations (16ns per)
hrt2tv_new out: 4000125 733913
hrt2tv_new: 310336290 for 10000000 iterations (31ns per)

Related issues

Related to illumos gate - Bug #13088: uniqtime() is too clever for its own goodNewshua ll

Actions
Actions

Also available in: Atom PDF