123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "config.h"
- #include "cryptobox.h"
- #include "curve25519.h"
- #include "platform_config.h"
-
- typedef struct {
- guint64 v[5];
- } fe51;
- typedef guint64 fe[10];
-
- extern void ladder_avx (fe *var, const guchar *p);
- extern void ladder_base_avx (fe *var, const guchar *p);
- extern void fe51_mul_avx (fe51 *a, const fe51 *b, const fe51 *c);
- extern void fe51_pack_avx (guchar *out, const fe51 *var);
- extern void fe51_nsquare_avx (fe51 *a, const fe51 *b, gint n);
-
-
- static guint64 load_3 (const unsigned char *in)
- {
- guint64 result;
- result = (guint64) in[0];
- result |= ((guint64) in[1]) << 8;
- result |= ((guint64) in[2]) << 16;
- return result;
- }
-
- static guint64 load_4 (const unsigned char *in)
- {
- guint64 result;
- result = (guint64) in[0];
- result |= ((guint64) in[1]) << 8;
- result |= ((guint64) in[2]) << 16;
- result |= ((guint64) in[3]) << 24;
- return result;
- }
-
- static void
- fe_frombytes (fe h, const unsigned char *s)
- {
- guint64 h0 = load_4 (s);
- guint64 h1 = load_3 (s + 4) << 6;
- guint64 h2 = load_3 (s + 7) << 5;
- guint64 h3 = load_3 (s + 10) << 3;
- guint64 h4 = load_3 (s + 13) << 2;
- guint64 h5 = load_4 (s + 16);
- guint64 h6 = load_3 (s + 20) << 7;
- guint64 h7 = load_3 (s + 23) << 5;
- guint64 h8 = load_3 (s + 26) << 4;
- guint64 h9 = load_3 (s + 29) << 2;
- guint64 carry0;
- guint64 carry1;
- guint64 carry2;
- guint64 carry3;
- guint64 carry4;
- guint64 carry5;
- guint64 carry6;
- guint64 carry7;
- guint64 carry8;
- guint64 carry9;
-
- carry9 = h9 >> 25;
- h0 += carry9 * 19;
- h9 &= 0x1FFFFFF;
- carry1 = h1 >> 25;
- h2 += carry1;
- h1 &= 0x1FFFFFF;
- carry3 = h3 >> 25;
- h4 += carry3;
- h3 &= 0x1FFFFFF;
- carry5 = h5 >> 25;
- h6 += carry5;
- h5 &= 0x1FFFFFF;
- carry7 = h7 >> 25;
- h8 += carry7;
- h7 &= 0x1FFFFFF;
-
- carry0 = h0 >> 26;
- h1 += carry0;
- h0 &= 0x3FFFFFF;
- carry2 = h2 >> 26;
- h3 += carry2;
- h2 &= 0x3FFFFFF;
- carry4 = h4 >> 26;
- h5 += carry4;
- h4 &= 0x3FFFFFF;
- carry6 = h6 >> 26;
- h7 += carry6;
- h6 &= 0x3FFFFFF;
- carry8 = h8 >> 26;
- h9 += carry8;
- h8 &= 0x3FFFFFF;
-
- h[0] = h0;
- h[1] = h1;
- h[2] = h2;
- h[3] = h3;
- h[4] = h4;
- h[5] = h5;
- h[6] = h6;
- h[7] = h7;
- h[8] = h8;
- h[9] = h9;
- }
-
- #define fe51_square(x, y) fe51_nsquare_avx(x, y, 1)
-
- void fe51_invert (fe51 *r, const fe51 *x)
- {
- fe51 z2;
- fe51 z9;
- fe51 z11;
- fe51 z2_5_0;
- fe51 z2_10_0;
- fe51 z2_20_0;
- fe51 z2_50_0;
- fe51 z2_100_0;
- fe51 t;
-
- /* 2 */ fe51_square (&z2, x);
- /* 4 */ fe51_square (&t, &z2);
- /* 8 */ fe51_square (&t, &t);
- /* 9 */ fe51_mul_avx (&z9, &t, x);
- /* 11 */ fe51_mul_avx (&z11, &z9, &z2);
- /* 22 */ fe51_square (&t, &z11);
- /* 2^5 - 2^0 = 31 */ fe51_mul_avx (&z2_5_0, &t, &z9);
-
- /* 2^10 - 2^5 */ fe51_nsquare_avx (&t, &z2_5_0, 5);
- /* 2^10 - 2^0 */ fe51_mul_avx (&z2_10_0, &t, &z2_5_0);
-
- /* 2^20 - 2^10 */ fe51_nsquare_avx (&t, &z2_10_0, 10);
- /* 2^20 - 2^0 */ fe51_mul_avx (&z2_20_0, &t, &z2_10_0);
-
- /* 2^40 - 2^20 */ fe51_nsquare_avx (&t, &z2_20_0, 20);
- /* 2^40 - 2^0 */ fe51_mul_avx (&t, &t, &z2_20_0);
-
- /* 2^50 - 2^10 */ fe51_nsquare_avx (&t, &t, 10);
- /* 2^50 - 2^0 */ fe51_mul_avx (&z2_50_0, &t, &z2_10_0);
-
- /* 2^100 - 2^50 */ fe51_nsquare_avx (&t, &z2_50_0, 50);
- /* 2^100 - 2^0 */ fe51_mul_avx (&z2_100_0, &t, &z2_50_0);
-
- /* 2^200 - 2^100 */ fe51_nsquare_avx (&t, &z2_100_0, 100);
- /* 2^200 - 2^0 */ fe51_mul_avx (&t, &t, &z2_100_0);
-
- /* 2^250 - 2^50 */ fe51_nsquare_avx (&t, &t, 50);
- /* 2^250 - 2^0 */ fe51_mul_avx (&t, &t, &z2_50_0);
-
- /* 2^255 - 2^5 */ fe51_nsquare_avx (&t, &t, 5);
- /* 2^255 - 21 */ fe51_mul_avx (r, &t, &z11);
- }
-
- #define x1 var[0]
- #define x2 var[1]
- #define z2 var[2]
-
- void
- scalarmult_avx (unsigned char *q,
- const unsigned char *n,
- const unsigned char *p)
- {
- fe var[3];
- fe51 x_51;
- fe51 z_51;
- unsigned char e[32];
-
- memcpy (e, n, 32);
- e[0] &= 248;
- e[31] &= 127;
- e[31] |= 64;
-
- fe_frombytes (x1, p);
-
- ladder_avx (var, e);
-
- z_51.v[0] = (z2[1] << 26) + z2[0];
- z_51.v[1] = (z2[3] << 26) + z2[2];
- z_51.v[2] = (z2[5] << 26) + z2[4];
- z_51.v[3] = (z2[7] << 26) + z2[6];
- z_51.v[4] = (z2[9] << 26) + z2[8];
-
- x_51.v[0] = (x2[1] << 26) + x2[0];
- x_51.v[1] = (x2[3] << 26) + x2[2];
- x_51.v[2] = (x2[5] << 26) + x2[4];
- x_51.v[3] = (x2[7] << 26) + x2[6];
- x_51.v[4] = (x2[9] << 26) + x2[8];
-
- fe51_invert (&z_51, &z_51);
- fe51_mul_avx (&x_51, &x_51, &z_51);
- fe51_pack_avx (q, &x_51);
- }
-
- #undef x2
- #undef z2
- #define x2 var[0]
- #define z2 var[1]
-
- int
- scalarmult_base_avx (unsigned char *q, const unsigned char *n)
- {
- unsigned char e[32];
-
- fe var[3];
-
- fe51 x_51;
- fe51 z_51;
-
- memcpy (e, n, 32);
- e[0] &= 248;
- e[31] &= 127;
- e[31] |= 64;
-
- ladder_base_avx (var, e);
-
- z_51.v[0] = (z2[1] << 26) + z2[0];
- z_51.v[1] = (z2[3] << 26) + z2[2];
- z_51.v[2] = (z2[5] << 26) + z2[4];
- z_51.v[3] = (z2[7] << 26) + z2[6];
- z_51.v[4] = (z2[9] << 26) + z2[8];
-
- x_51.v[0] = (x2[1] << 26) + x2[0];
- x_51.v[1] = (x2[3] << 26) + x2[2];
- x_51.v[2] = (x2[5] << 26) + x2[4];
- x_51.v[3] = (x2[7] << 26) + x2[6];
- x_51.v[4] = (x2[9] << 26) + x2[8];
-
- fe51_invert (&z_51, &z_51);
- fe51_mul_avx (&x_51, &x_51, &z_51);
- fe51_pack_avx (q, &x_51);
-
- return 0;
- }
|