/* Exhaustive search for potential TwoSum-like algorithms with at most * n operations amongst the addition, the subtraction, minNum, maxNum, * minNumMag and maxNumMag (see IEEE 754-2008 standard, Section 5.3.1); * no constants or other operations. By default the test is performed * on 4 well-chosen pairs of values (the 4th pair is necessary to prove * that minNum/maxNum is useless for 5 and 6 operations). These pairs * are built from integer constants and the nextabove function (in C, * nextafter towards +inf). Except in precisions 2 and 3, one has: * a1 = 8 + 8 eps and b1 = 1 + 3 eps * a2 = 1 + 5 eps and b2 = 8 + 8 eps * a3 = 3 and b3 = 3 + 2 eps * a4 = - a1 and b4 = - b1 * * Compile with -DMPFR or with -DSIPE= to use MPFR or SIPE in the * given precision, respectively. This is necessary for the search in * other precisions than 53, or if the machine has extended precision. * Note that the SIPE precision is limited to 15 on 32-bit machines and * 31 on 64-bit machines, but this is sufficient for the proof. * * For instance, with gcc: * gcc -Wall -O3 -march=native -std=c99 minasm.c -o minasm -lm * gcc -Wall -O3 -march=native -std=c99 minasm.c -o minasm -DMPFR -lmpfr * gcc -Wall -O3 -march=native -std=c99 minasm.c -o minasm -DSIPE=12 * To run the version built against MPFR, e.g.: * ./minasm (for the help) * ./minasm 1 2 6 53 * ./minasm 1 4 6 53 * ./minasm 1 6 5 53 * * This program may need some C99 features. * * Copyright (C) 2008-2012 Vincent Lefevre, INRIA / LIP (ENS-Lyon), * for the article "On the computation of correctly-rounded sums" * by P. Kornerup, V. Lefevre, N. Louvet, and J.-M. Muller. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see http://www.gnu.org/licenses/ * or write to the Free Software Foundation, Inc., 51 Franklin St, * Fifth Floor, Boston, MA 02110-1301, USA. */ #define SVNID "$Id: minasm.c 56954 2012-12-11 15:07:51Z vinc17/ypig $" #include #include #include #include #include #include #ifdef SIPE #ifdef MPFR #error "MPFR and SIPE must not be defined at the same time." #endif #if !(SIPE > 1) #error "Bad SIPE value (precision)" #endif #endif /* Number of results */ #ifndef NR #define NR 4 #endif /* Node numbers of arguments x and y */ #define NNX (-2) #define NNY (-1) #define NVALMAX 16 #define NOPSBOUND (NVALMAX - 2) #define EVEN(I) (((I) & 1) == 0) typedef enum { OPADD, OPSUB, OPMIN, OPMAX, OPMINMAG, OPMAXMAG, NOP } OP; char opstr[NOP][4] = { "add", "sub", "min", "max", "MIN", "MAX" }; #if defined MPFR #include typedef mpfr_t num_t; static void my_minmag (mpfr_ptr m, mpfr_ptr x, mpfr_ptr y, mpfr_rnd_t r) { int c = mpfr_cmpabs (x, y); c < 0 ? mpfr_set (m, x, GMP_RNDN) : c > 0 ? mpfr_set (m, y, GMP_RNDN) : mpfr_min (m, x, y, GMP_RNDN); } static void my_maxmag (mpfr_ptr m, mpfr_ptr x, mpfr_ptr y, mpfr_rnd_t r) { int c = mpfr_cmpabs (x, y); c < 0 ? mpfr_set (m, y, GMP_RNDN) : c > 0 ? mpfr_set (m, x, GMP_RNDN) : mpfr_max (m, x, y, GMP_RNDN); } #define NEQ(X,Y) (! mpfr_equal_p ((X), (Y))) #define ARGC 5 #define ARGPREC " " #elif defined SIPE #include typedef sipe_t num_t; #define NEQ(X,Y) (sipe_ne ((X), (Y))) #define ARGC 4 #define ARGPREC "" #define ABOVE(X) (sipe_nextabove ((X), SIPE)) #else typedef double num_t; #define NEQ(X,Y) ((X) != (Y)) #define ARGC 4 #define ARGPREC "" #endif typedef struct { int i; /* node number of 1st operand */ int j; /* node number of 2nd operand */ int o; /* operation */ int d; /* depth */ num_t r[NR]; /* results */ } NODE; #define SWAP_CHILDREN(NODE) \ do \ { \ int _temp = (NODE).i; \ (NODE).i = (NODE).j; \ (NODE).j = _temp; \ } \ while (0) NODE *fulldag, *dag; num_t res[NR]; long ctr = 0, algo = 0; int verbosity, mode, nops; int var (int k) { return k == NNX ? 'x' : k == NNY ? 'y' : 'a' + k; } /* Check that after the first node, an x child node is used before the first y child node (due to the symmetry between x and y). */ static int xfirst (int last) { #ifdef NOXFIRST return 1; #else int k; assert (NNX < NNY); /* since i <= j is required for the addition */ for (k = 1; k <= last; k++) { if (dag[k].i == NNX) return 1; if (dag[k].i == NNY) return 0; if (dag[k].j == NNX) return 1; if (dag[k].j == NNY) return 0; } exit (3); /* One should never reach that. */ #endif } /* Check that all intermediate results are used. */ static int allused (int last) { int *used; int ret; int nint, k; if (last < 2) return 1; nint = last - 1; /* intermediate results to check: index 1 to last - 1 */ used = calloc (nint, sizeof (int)); if (used == NULL) { fprintf (stderr, "minasm: can't allocate memory\n"); exit (2); } for (k = 2; k <= last; k++) { if (dag[k].i >= 1) used[dag[k].i - 1] = 1; if (dag[k].j >= 1) used[dag[k].j - 1] = 1; } ret = 1; for (k = 0; k < nint; k++) if (!used[k]) { ret = 0; break; } free (used); return ret; } /* Check that the operations (o,i,j) are all different and sorted in the (max(i,j), min(i,j), o) lexicographic order. Note: the first operation "add x y" is not taken into account in order to allow "add x x". */ static int sorted (int last) { #ifndef NOSORTED int i, j, k, u, v; const int sh = 3; assert (NOP <= 1 << sh); for (k = 0; k <= last; k++) { i = dag[k].i <= dag[k].j ? dag[k].i : dag[k].j; j = dag[k].i <= dag[k].j ? dag[k].j : dag[k].i; v = (((j + 2) * NVALMAX + (i + 2)) << sh) + dag[k].o; if (k > 0 && (u == v || (k > 1 && u > v))) return 0; u = v; } #endif return 1; } static void printalgo (char *s, NODE *p, int last) { int k; printf ("%s Algorithm in %d operations (depth = %d)\n", s, last + 1, p[last].d); for (k = 0; k <= last; k++) printf ("%c = %s %c %c (depth %d)\n", var(k), opstr[p[k].o], var(p[k].i), var(p[k].j), p[k].d); } /* Try to change some signs of intermediate results and see if one obtains a preferred equivalent algorithm. */ static int symsubcheck (int last) { int ret = 1; #ifndef NOSYMSUBCHECK size_t size; NODE *tdag; int *fneg, *neg; int m; if (last < 2) return 1; size = (last + 1) * sizeof (NODE); tdag = malloc (size); fneg = calloc (last + 3, sizeof (int)); if (tdag == NULL || fneg == NULL) { fprintf (stderr, "minasm: can't allocate memory\n"); exit (2); } neg = fneg + 2; for (m = 1; m < 1 << (last - 1); m++) { int k, q; k = 1; q = m; while (EVEN (q)) { k++; q >>= 1; } assert (k < last); neg[k] ^= 1; /* Let's find the node index of the first sign change. */ k = 1; while (!neg[k]) k++; /* It must be a subtraction dag[k].i - dag[k].j, otherwise the transformation is not possible. And one must have i > j, as the form i < j is preferred. */ if (dag[k].o != OPSUB || dag[k].i < dag[k].j) goto next_m; memcpy (tdag, dag, size); for ( ; k <= last ; k++) if (neg[dag[k].i] & neg[dag[k].j]) { /* Both arguments have their sign changed. */ if (neg[k]) { if (dag[k].o == OPMIN) tdag[k].o = OPMAX; else if (dag[k].o == OPMAX) tdag[k].o = OPMIN; else if (dag[k].o != OPADD) goto next_m; } else if (dag[k].o == OPSUB) SWAP_CHILDREN (tdag[k]); else goto next_m; } else if (neg[dag[k].i] | neg[dag[k].j]) { /* Only one argument has its sign changed. The operation must be either an addition or a subtraction. */ if (dag[k].o == OPADD) { tdag[k].o = OPSUB; if (neg[dag[k].i] ^ neg[k]) SWAP_CHILDREN (tdag[k]); } else if (dag[k].o == OPSUB && neg[dag[k].i] == neg[k]) { tdag[k].o = OPADD; if (dag[k].i > dag[k].j) SWAP_CHILDREN (tdag[k]); } else goto next_m; } if (verbosity > 1) { /* Output source and transformed algorithms, for checking. */ printalgo ("", dag, last); printalgo ("", tdag, last); } ret = 0; break; next_m: ; } /* for m */ free (fneg); free (tdag); #endif return ret; } /* Search for all DAG's, where cur is the next node number to determine and last is the last node number to determine (result). */ static void dagsearch (int cur, int last) { int i, j, r; OP op; for (i = -2; i < cur; i++) for (j = -2; j < cur; j++) for (op = 0; op < mode; op++) { /* Require i != j except for the addition since using the same argument x twice is useless (one gets 0 for the subtraction and x for the min/max operations). Require i <= j except for the subtraction because of commutativity. */ if ((op != OPADD && i == j) || (op != OPSUB && i > j)) continue; dag[cur].i = i; dag[cur].j = j; dag[cur].o = op; dag[cur].d = (dag[i].d < dag[j].d ? dag[j].d : dag[i].d) + 1; switch (op) { #define DOOP2(S) for (r = 0; r < NR; r++) { S; } break; #if defined MPFR #define DOOP(F) DOOP2(F(dag[cur].r[r], dag[i].r[r], dag[j].r[r], GMP_RNDN)) case OPADD: DOOP (mpfr_add) case OPSUB: DOOP (mpfr_sub) case OPMIN: DOOP (mpfr_min) case OPMAX: DOOP (mpfr_max) case OPMINMAG: DOOP (my_minmag) case OPMAXMAG: DOOP (my_maxmag) #elif defined SIPE #define DOOP(X) DOOP2(dag[cur].r[r] = (X)) case OPADD: DOOP (sipe_add (dag[i].r[r], dag[j].r[r], SIPE)) case OPSUB: DOOP (sipe_sub (dag[i].r[r], dag[j].r[r], SIPE)) case OPMIN: DOOP (sipe_min (dag[i].r[r], dag[j].r[r])) case OPMAX: DOOP (sipe_max (dag[i].r[r], dag[j].r[r])) case OPMINMAG: DOOP (sipe_minmag (dag[i].r[r], dag[j].r[r])) case OPMAXMAG: DOOP (sipe_maxmag (dag[i].r[r], dag[j].r[r])) #else /* neither MPFR nor SIPE */ #define DOOP(X) DOOP2(dag[cur].r[r] = (X)) #define MINMAG(X,Y) \ (fabs(X) < fabs(Y) ? (X) : fabs(X) > fabs(Y) ? (Y) : fmin((X),(Y))) #define MAXMAG(X,Y) \ (fabs(X) < fabs(Y) ? (Y) : fabs(X) > fabs(Y) ? (X) : fmax((X),(Y))) case OPADD: DOOP (dag[i].r[r] + dag[j].r[r]) case OPSUB: DOOP (dag[i].r[r] - dag[j].r[r]) case OPMIN: DOOP (fmin (dag[i].r[r], dag[j].r[r])) case OPMAX: DOOP (fmax (dag[i].r[r], dag[j].r[r])) case OPMINMAG: DOOP (MINMAG (dag[i].r[r], dag[j].r[r])) case OPMAXMAG: DOOP (MAXMAG (dag[i].r[r], dag[j].r[r])) #endif default: assert (0); /* should not occur */ } if (cur < last) dagsearch (cur + 1, last); else { ctr++; for (r = 0; r < NR; r++) if (NEQ (dag[cur].r[r], res[r])) goto next; if (xfirst (cur) && allused (cur) && sorted (cur) && symsubcheck (cur)) { algo++; if (verbosity) printalgo ("***", dag, cur); } next: ; } } } int main (int argc, char **argv) { char *end; int i, r; long tmp; #ifdef MPFR mpfr_prec_t prec; #endif fprintf (stderr, "%s\n\n", SVNID); #ifdef MPFR fprintf (stderr, "GMP ..... Library: %-12s Header: %d.%d.%d\n", gmp_version, __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, __GNU_MP_VERSION_PATCHLEVEL); fprintf (stderr, "MPFR .... Library: %-12s Header: %s (based on %d.%d.%d)\n", mpfr_get_version (), MPFR_VERSION_STRING, MPFR_VERSION_MAJOR, MPFR_VERSION_MINOR, MPFR_VERSION_PATCHLEVEL); # if MPFR_VERSION_MAJOR >= 3 fprintf (stderr, "MPFR features: TLS = %s, decimal = %s", mpfr_buildopt_tls_p () ? "yes" : "no", mpfr_buildopt_decimal_p () ? "yes" : "no"); # if MPFR_VERSION_MAJOR > 3 || MPFR_VERSION_MINOR >= 1 fprintf (stderr, ", GMP internals = %s\nMPFR tuning: %s", mpfr_buildopt_gmpinternals_p () ? "yes" : "no", mpfr_buildopt_tune_case ()); # endif fprintf (stderr, "\n"); # endif # if MPFR_VERSION >= MPFR_VERSION_NUM(2,3,0) fprintf (stderr, "MPFR patches: %s\n", mpfr_get_patches ()); # endif fprintf (stderr, "\n"); #endif #ifdef SIPE fprintf (stderr, "%s\n\n", SIPE_SVNID); if (SIPE < 2 || SIPE > SIPE_PREC_MAX) { fprintf (stderr, "minasm was compiled with an incorrect SIPE value" " (precision).\n"); exit (1); } #endif if (argc != ARGC) { fprintf (stderr, "Usage: minasm " ARGPREC "\n"); fprintf (stderr, "Verbosity:\n" " 0: output the number of DAG's and algorithms.\n" " 1: also output the algorithms.\n" " 2: also output source and transformed algorithms.\n"); fprintf (stderr, "Mode:\n" " 2: consider additions and subtractions only.\n" " 4: also consider minNum and maxNum.\n" " 6: also consider minNumMag and maxNumMag.\n"); exit (1); } tmp = strtol (argv[1], &end, 10); if (*end != '\0' || tmp < 0 || tmp > 2) { fprintf (stderr, "minasm: verbosity must be 0 or 1\n"); exit (1); } verbosity = tmp; tmp = strtol (argv[2], &end, 10); if (*end != '\0' || tmp < 2 || tmp > 6 || (tmp & 1) != 0) { fprintf (stderr, "minasm: mode must be 2, 4 or 6\n"); exit (1); } mode = tmp; tmp = strtol (argv[3], &end, 10); if (*end != '\0' || tmp < 1 || tmp > NOPSBOUND) { fprintf (stderr, "minasm: bad number of operations\n"); exit (1); } nops = tmp; #ifdef MPFR assert (ARGC >= 4); tmp = strtol (argv[4], &end, 10); if (*end != '\0' || tmp < MPFR_PREC_MIN || tmp > MPFR_PREC_MAX) { fprintf (stderr, "minasm: bad precision\n"); exit (1); } prec = tmp; #endif fulldag = calloc (2 + nops, sizeof (NODE)); if (fulldag == NULL) { fprintf (stderr, "minasm: can't allocate memory for the DAG's\n"); exit (2); } dag = fulldag + 2; assert (NR >= 2 && NR <= 4); #if defined MPFR for (r = 0; r < NR; r++) { int k; for (k = NNX; k < nops; k++) mpfr_init2 (dag[k].r[r], prec); mpfr_init2 (res[r], prec); } mpfr_set_ui (dag[NNX].r[0], 8, GMP_RNDN); mpfr_nextabove (dag[NNX].r[0]); /* a[0] = 8 + 8 eps */ mpfr_set_ui (dag[NNY].r[0], 1, GMP_RNDN); mpfr_nextabove (dag[NNY].r[0]); mpfr_nextabove (dag[NNY].r[0]); mpfr_nextabove (dag[NNY].r[0]); /* b[0] = 1 + 3 eps */ mpfr_set (dag[NNX].r[1], dag[NNY].r[0], GMP_RNDN); mpfr_nextabove (dag[NNX].r[1]); mpfr_nextabove (dag[NNX].r[1]); /* a[1] = 1 + 5 eps */ mpfr_set (dag[NNY].r[1], dag[NNX].r[0], GMP_RNDN); /* b[1] = 8 + 8 eps */ if (NR >= 3) { mpfr_set_ui (dag[NNX].r[2], 3, GMP_RNDN); /* a[2] = 3 */ mpfr_set (dag[NNY].r[2], dag[NNX].r[2], GMP_RNDN); mpfr_nextabove (dag[NNY].r[2]); /* b[2] = 3 + 2 eps */ } if (NR >= 4) { mpfr_neg (dag[NNX].r[3], dag[NNX].r[0], GMP_RNDN); mpfr_neg (dag[NNY].r[3], dag[NNY].r[0], GMP_RNDN); } #elif defined SIPE dag[NNX].r[0] = ABOVE (sipe_set_si (8, SIPE)); dag[NNY].r[0] = ABOVE (ABOVE (ABOVE (sipe_set_si (1, SIPE)))); dag[NNX].r[1] = ABOVE (ABOVE (dag[NNY].r[0])); dag[NNY].r[1] = dag[NNX].r[0]; if (NR >= 3) { dag[NNX].r[2] = sipe_set_si (3, SIPE); dag[NNY].r[2] = ABOVE (dag[NNX].r[2]); } if (NR >= 4) { dag[NNX].r[3] = sipe_neg (dag[NNX].r[0], SIPE); dag[NNY].r[3] = sipe_neg (dag[NNY].r[0], SIPE); } #else /* neither MPFR nor SIPE */ dag[NNX].r[0] = nextafter (8.0, 9.0); dag[NNY].r[0] = nextafter (nextafter (nextafter (1.0, 2.0), 2.0), 2.0); dag[NNX].r[1] = nextafter (nextafter (dag[NNY].r[0], 2.0), 2.0); dag[NNY].r[1] = dag[NNX].r[0]; if (NR >= 3) { dag[NNX].r[2] = 3.0; dag[NNY].r[2] = nextafter (dag[NNX].r[2], 4.0); } if (NR >= 4) { dag[NNX].r[3] = -dag[NNX].r[0]; dag[NNY].r[3] = -dag[NNY].r[0]; } #endif /* We can start by computing s = o(x + y), since this value must be returned. The error term will correspond to the last node of the DAG, and will also be called "result". */ dag[0].i = NNX; dag[0].j = NNY; dag[0].o = OPADD; dag[0].d = 1; for (r = 0; r < NR; r++) { int k1, k2; #if defined MPFR mpfr_add (dag[0].r[r], dag[NNX].r[r], dag[NNY].r[r], GMP_RNDN); k1 = mpfr_cmpabs (dag[NNX].r[r], dag[NNY].r[r]) < 0 ? NNY : NNX; k2 = NNX + NNY - k1; mpfr_sub (res[r], dag[k1].r[r], dag[0].r[r], GMP_RNDN); mpfr_add (res[r], res[r], dag[k2].r[r], GMP_RNDN); #elif defined SIPE dag[0].r[r] = sipe_add (dag[NNX].r[r], dag[NNY].r[r], SIPE); k1 = sipe_cmpmag (dag[NNX].r[r], dag[NNY].r[r]) < 0 ? NNY : NNX; k2 = NNX + NNY - k1; res[r] = sipe_sub (dag[k1].r[r], dag[0].r[r], SIPE); res[r] = sipe_add (res[r], dag[k2].r[r], SIPE); #else /* neither MPFR nor SIPE */ dag[0].r[r] = dag[NNX].r[r] + dag[NNY].r[r]; k1 = fabs (dag[NNX].r[r]) < fabs (dag[NNY].r[r]) ? NNY : NNX; k2 = NNX + NNY - k1; res[r] = dag[k1].r[r] - dag[0].r[r]; res[r] += dag[k2].r[r]; #endif } printf ("Searching for algorithms with up to %d operations," " using %d pairs...\n", nops, NR); for (i = 1; i < nops; i++) dagsearch (1, i); printf ("%ld DAG's and %ld algorithm(s)\n", ctr, algo); return 0; }