OpenCoverage

x86_64-gcc.c

Absolute File Name:/home/opencoverage/opencoverage/guest-scripts/openssl/src/crypto/bn/asm/x86_64-gcc.c
Source codeSwitch to Preprocessed file
LineSourceCount
1/*-
2 * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.-
3 *-
4 * Licensed under the OpenSSL license (the "License"). You may not use-
5 * this file except in compliance with the License. You can obtain a copy-
6 * in the file LICENSE in the source distribution or at-
7 * https://www.openssl.org/source/license.html-
8 */-
9-
10#include "../bn_lcl.h"-
11#if !(defined(__GNUC__) && __GNUC__>=2)-
12# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */-
13#else-
14/*--
15 * x86_64 BIGNUM accelerator version 0.1, December 2002.-
16 *-
17 * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL-
18 * project.-
19 *-
20 * Rights for redistribution and usage in source and binary forms are-
21 * granted according to the OpenSSL license. Warranty of any kind is-
22 * disclaimed.-
23 *-
24 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real-
25 * versions, like 1.0...-
26 * A. Well, that's because this code is basically a quick-n-dirty-
27 * proof-of-concept hack. As you can see it's implemented with-
28 * inline assembler, which means that you're bound to GCC and that-
29 * there might be enough room for further improvement.-
30 *-
31 * Q. Why inline assembler?-
32 * A. x86_64 features own ABI which I'm not familiar with. This is-
33 * why I decided to let the compiler take care of subroutine-
34 * prologue/epilogue as well as register allocation. For reference.-
35 * Win64 implements different ABI for AMD64, different from Linux.-
36 *-
37 * Q. How much faster does it get?-
38 * A. 'apps/openssl speed rsa dsa' output with no-asm:-
39 *-
40 * sign verify sign/s verify/s-
41 * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2-
42 * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0-
43 * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8-
44 * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6-
45 * sign verify sign/s verify/s-
46 * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3-
47 * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2-
48 * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0-
49 *-
50 * 'apps/openssl speed rsa dsa' output with this module:-
51 *-
52 * sign verify sign/s verify/s-
53 * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9-
54 * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7-
55 * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0-
56 * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8-
57 * sign verify sign/s verify/s-
58 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3-
59 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4-
60 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6-
61 *-
62 * For the reference. IA-32 assembler implementation performs-
63 * very much like 64-bit code compiled with no-asm on the same-
64 * machine.-
65 */-
66-
67# undef mul-
68# undef mul_add-
69-
70/*--
71 * "m"(a), "+m"(r) is the way to favor DirectPath ยต-code;-
72 * "g"(0) let the compiler to decide where does it-
73 * want to keep the value of zero;-
74 */-
75# define mul_add(r,a,word,carry) do { \-
76 register BN_ULONG high,low; \-
77 asm ("mulq %3" \-
78 : "=a"(low),"=d"(high) \-
79 : "a"(word),"m"(a) \-
80 : "cc"); \-
81 asm ("addq %2,%0; adcq %3,%1" \-
82 : "+r"(carry),"+d"(high)\-
83 : "a"(low),"g"(0) \-
84 : "cc"); \-
85 asm ("addq %2,%0; adcq %3,%1" \-
86 : "+m"(r),"+d"(high) \-
87 : "r"(carry),"g"(0) \-
88 : "cc"); \-
89 carry=high; \-
90 } while (0)-
91-
92# define mul(r,a,word,carry) do { \-
93 register BN_ULONG high,low; \-
94 asm ("mulq %3" \-
95 : "=a"(low),"=d"(high) \-
96 : "a"(word),"g"(a) \-
97 : "cc"); \-
98 asm ("addq %2,%0; adcq %3,%1" \-
99 : "+r"(carry),"+d"(high)\-
100 : "a"(low),"g"(0) \-
101 : "cc"); \-
102 (r)=carry, carry=high; \-
103 } while (0)-
104# undef sqr-
105# define sqr(r0,r1,a) \-
106 asm ("mulq %2" \-
107 : "=a"(r0),"=d"(r1) \-
108 : "a"(a) \-
109 : "cc");-
110-
111BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,-
112 BN_ULONG w)-
113{-
114 BN_ULONG c1 = 0;-
115-
116 if (num <= 0)
num <= 0Description
TRUEnever evaluated
FALSEevaluated 5682694 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
0-5682694
117 return c1;
never executed: return c1;
0
118-
119 while (num & ~3) {
num & ~3Description
TRUEevaluated 33447085 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
FALSEevaluated 5682694 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
5682694-33447085
120 mul_add(rp[0], ap[0], w, c1);-
121 mul_add(rp[1], ap[1], w, c1);-
122 mul_add(rp[2], ap[2], w, c1);-
123 mul_add(rp[3], ap[3], w, c1);-
124 ap += 4;-
125 rp += 4;-
126 num -= 4;-
127 }
executed 33447085 times by 2 tests: end of block
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
33447085
128 if (num) {
numDescription
TRUEevaluated 3420323 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 2262371 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
2262371-3420323
129 mul_add(rp[0], ap[0], w, c1);-
130 if (--num == 0)
--num == 0Description
TRUEevaluated 1930117 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 1490206 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
1490206-1930117
131 return c1;
executed 1930117 times by 1 test: return c1;
Executed by:
  • libcrypto.so.1.1
1930117
132 mul_add(rp[1], ap[1], w, c1);-
133 if (--num == 0)
--num == 0Description
TRUEevaluated 664534 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 825672 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
664534-825672
134 return c1;
executed 664534 times by 1 test: return c1;
Executed by:
  • libcrypto.so.1.1
664534
135 mul_add(rp[2], ap[2], w, c1);-
136 return c1;
executed 825672 times by 1 test: return c1;
Executed by:
  • libcrypto.so.1.1
825672
137 }-
138-
139 return c1;
executed 2262371 times by 2 tests: return c1;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
2262371
140}-
141-
142BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)-
143{-
144 BN_ULONG c1 = 0;-
145-
146 if (num <= 0)
num <= 0Description
TRUEnever evaluated
FALSEevaluated 25981325 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
0-25981325
147 return c1;
never executed: return c1;
0
148-
149 while (num & ~3) {
num & ~3Description
TRUEevaluated 85045232 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
FALSEevaluated 25981325 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
25981325-85045232
150 mul(rp[0], ap[0], w, c1);-
151 mul(rp[1], ap[1], w, c1);-
152 mul(rp[2], ap[2], w, c1);-
153 mul(rp[3], ap[3], w, c1);-
154 ap += 4;-
155 rp += 4;-
156 num -= 4;-
157 }
executed 85045232 times by 2 tests: end of block
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
85045232
158 if (num) {
numDescription
TRUEevaluated 7940794 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
FALSEevaluated 18040531 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
7940794-18040531
159 mul(rp[0], ap[0], w, c1);-
160 if (--num == 0)
--num == 0Description
TRUEevaluated 4007626 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
FALSEevaluated 3933168 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
3933168-4007626
161 return c1;
executed 4007626 times by 2 tests: return c1;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
4007626
162 mul(rp[1], ap[1], w, c1);-
163 if (--num == 0)
--num == 0Description
TRUEevaluated 2251147 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 1682021 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
1682021-2251147
164 return c1;
executed 2251147 times by 1 test: return c1;
Executed by:
  • libcrypto.so.1.1
2251147
165 mul(rp[2], ap[2], w, c1);-
166 }
executed 1682021 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
1682021
167 return c1;
executed 19722552 times by 2 tests: return c1;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
19722552
168}-
169-
170void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)-
171{-
172 if (n <= 0)
n <= 0Description
TRUEnever evaluated
FALSEevaluated 213797 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
0-213797
173 return;
never executed: return;
0
174-
175 while (n & ~3) {
n & ~3Description
TRUEevaluated 427862 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 213797 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
213797-427862
176 sqr(r[0], r[1], a[0]);-
177 sqr(r[2], r[3], a[1]);-
178 sqr(r[4], r[5], a[2]);-
179 sqr(r[6], r[7], a[3]);-
180 a += 4;-
181 r += 8;-
182 n -= 4;-
183 }
executed 427862 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
427862
184 if (n) {
nDescription
TRUEevaluated 207231 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 6566 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
6566-207231
185 sqr(r[0], r[1], a[0]);-
186 if (--n == 0)
--n == 0Description
TRUEevaluated 168254 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 38977 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
38977-168254
187 return;
executed 168254 times by 1 test: return;
Executed by:
  • libcrypto.so.1.1
168254
188 sqr(r[2], r[3], a[1]);-
189 if (--n == 0)
--n == 0Description
TRUEevaluated 19195 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 19782 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
19195-19782
190 return;
executed 19195 times by 1 test: return;
Executed by:
  • libcrypto.so.1.1
19195
191 sqr(r[4], r[5], a[2]);-
192 }
executed 19782 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
19782
193}
executed 26348 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
26348
194-
195BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)-
196{-
197 BN_ULONG ret, waste;-
198-
199 asm("divq %4":"=a"(ret), "=d"(waste)-
200 : "a"(l), "d"(h), "r"(d)-
201 : "cc");-
202-
203 return ret;
executed 95669 times by 1 test: return ret;
Executed by:
  • libcrypto.so.1.1
95669
204}-
205-
206BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,-
207 int n)-
208{-
209 BN_ULONG ret;-
210 size_t i = 0;-
211-
212 if (n <= 0)
n <= 0Description
TRUEevaluated 81166 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
FALSEevaluated 11877289 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
81166-11877289
213 return 0;
executed 81166 times by 2 tests: return 0;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
81166
214-
215 asm volatile (" subq %0,%0 \n" /* clear carry */-
216 " jmp 1f \n"-
217 ".p2align 4 \n"-
218 "1: movq (%4,%2,8),%0 \n"-
219 " adcq (%5,%2,8),%0 \n"-
220 " movq %0,(%3,%2,8) \n"-
221 " lea 1(%2),%2 \n"-
222 " dec %1 \n"-
223 " jnz 1b \n"-
224 " sbbq %0,%0 \n"-
225 :"=&r" (ret), "+c"(n), "+r"(i)-
226 :"r"(rp), "r"(ap), "r"(bp)-
227 :"cc", "memory");-
228-
229 return ret & 1;
executed 11877289 times by 2 tests: return ret & 1;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
11877289
230}-
231-
232# ifndef SIMICS-
233BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,-
234 int n)-
235{-
236 BN_ULONG ret;-
237 size_t i = 0;-
238-
239 if (n <= 0)
n <= 0Description
TRUEevaluated 51011 times by 1 test
Evaluated by:
  • libcrypto.so.1.1
FALSEevaluated 42279231 times by 2 tests
Evaluated by:
  • libcrypto.so.1.1
  • sm2_internal_test
51011-42279231
240 return 0;
executed 51011 times by 1 test: return 0;
Executed by:
  • libcrypto.so.1.1
51011
241-
242 asm volatile (" subq %0,%0 \n" /* clear borrow */-
243 " jmp 1f \n"-
244 ".p2align 4 \n"-
245 "1: movq (%4,%2,8),%0 \n"-
246 " sbbq (%5,%2,8),%0 \n"-
247 " movq %0,(%3,%2,8) \n"-
248 " lea 1(%2),%2 \n"-
249 " dec %1 \n"-
250 " jnz 1b \n"-
251 " sbbq %0,%0 \n"-
252 :"=&r" (ret), "+c"(n), "+r"(i)-
253 :"r"(rp), "r"(ap), "r"(bp)-
254 :"cc", "memory");-
255-
256 return ret & 1;
executed 42279231 times by 2 tests: return ret & 1;
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
42279231
257}-
258# else-
259/* Simics 1.4<7 has buggy sbbq:-( */-
260# define BN_MASK2 0xffffffffffffffffL-
261BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)-
262{-
263 BN_ULONG t1, t2;-
264 int c = 0;-
265-
266 if (n <= 0)-
267 return (BN_ULONG)0;-
268-
269 for (;;) {-
270 t1 = a[0];-
271 t2 = b[0];-
272 r[0] = (t1 - t2 - c) & BN_MASK2;-
273 if (t1 != t2)-
274 c = (t1 < t2);-
275 if (--n <= 0)-
276 break;-
277-
278 t1 = a[1];-
279 t2 = b[1];-
280 r[1] = (t1 - t2 - c) & BN_MASK2;-
281 if (t1 != t2)-
282 c = (t1 < t2);-
283 if (--n <= 0)-
284 break;-
285-
286 t1 = a[2];-
287 t2 = b[2];-
288 r[2] = (t1 - t2 - c) & BN_MASK2;-
289 if (t1 != t2)-
290 c = (t1 < t2);-
291 if (--n <= 0)-
292 break;-
293-
294 t1 = a[3];-
295 t2 = b[3];-
296 r[3] = (t1 - t2 - c) & BN_MASK2;-
297 if (t1 != t2)-
298 c = (t1 < t2);-
299 if (--n <= 0)-
300 break;-
301-
302 a += 4;-
303 b += 4;-
304 r += 4;-
305 }-
306 return c;-
307}-
308# endif-
309-
310/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */-
311/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */-
312/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */-
313/*-
314 * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number-
315 * c=(c2,c1,c0)-
316 */-
317-
318/*-
319 * Keep in mind that carrying into high part of multiplication result-
320 * can not overflow, because it cannot be all-ones.-
321 */-
322# if 0-
323/* original macros are kept for reference purposes */-
324# define mul_add_c(a,b,c0,c1,c2) do { \-
325 BN_ULONG ta = (a), tb = (b); \-
326 BN_ULONG lo, hi; \-
327 BN_UMULT_LOHI(lo,hi,ta,tb); \-
328 c0 += lo; hi += (c0<lo)?1:0; \-
329 c1 += hi; c2 += (c1<hi)?1:0; \-
330 } while(0)-
331-
332# define mul_add_c2(a,b,c0,c1,c2) do { \-
333 BN_ULONG ta = (a), tb = (b); \-
334 BN_ULONG lo, hi, tt; \-
335 BN_UMULT_LOHI(lo,hi,ta,tb); \-
336 c0 += lo; tt = hi+((c0<lo)?1:0); \-
337 c1 += tt; c2 += (c1<tt)?1:0; \-
338 c0 += lo; hi += (c0<lo)?1:0; \-
339 c1 += hi; c2 += (c1<hi)?1:0; \-
340 } while(0)-
341-
342# define sqr_add_c(a,i,c0,c1,c2) do { \-
343 BN_ULONG ta = (a)[i]; \-
344 BN_ULONG lo, hi; \-
345 BN_UMULT_LOHI(lo,hi,ta,ta); \-
346 c0 += lo; hi += (c0<lo)?1:0; \-
347 c1 += hi; c2 += (c1<hi)?1:0; \-
348 } while(0)-
349# else-
350# define mul_add_c(a,b,c0,c1,c2) do { \-
351 BN_ULONG t1,t2; \-
352 asm ("mulq %3" \-
353 : "=a"(t1),"=d"(t2) \-
354 : "a"(a),"m"(b) \-
355 : "cc"); \-
356 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \-
357 : "+r"(c0),"+r"(c1),"+r"(c2) \-
358 : "r"(t1),"r"(t2),"g"(0) \-
359 : "cc"); \-
360 } while (0)-
361-
362# define sqr_add_c(a,i,c0,c1,c2) do { \-
363 BN_ULONG t1,t2; \-
364 asm ("mulq %2" \-
365 : "=a"(t1),"=d"(t2) \-
366 : "a"(a[i]) \-
367 : "cc"); \-
368 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \-
369 : "+r"(c0),"+r"(c1),"+r"(c2) \-
370 : "r"(t1),"r"(t2),"g"(0) \-
371 : "cc"); \-
372 } while (0)-
373-
374# define mul_add_c2(a,b,c0,c1,c2) do { \-
375 BN_ULONG t1,t2; \-
376 asm ("mulq %3" \-
377 : "=a"(t1),"=d"(t2) \-
378 : "a"(a),"m"(b) \-
379 : "cc"); \-
380 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \-
381 : "+r"(c0),"+r"(c1),"+r"(c2) \-
382 : "r"(t1),"r"(t2),"g"(0) \-
383 : "cc"); \-
384 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \-
385 : "+r"(c0),"+r"(c1),"+r"(c2) \-
386 : "r"(t1),"r"(t2),"g"(0) \-
387 : "cc"); \-
388 } while (0)-
389# endif-
390-
391# define sqr_add_c2(a,i,j,c0,c1,c2) \-
392 mul_add_c2((a)[i],(a)[j],c0,c1,c2)-
393-
394void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)-
395{-
396 BN_ULONG c1, c2, c3;-
397-
398 c1 = 0;-
399 c2 = 0;-
400 c3 = 0;-
401 mul_add_c(a[0], b[0], c1, c2, c3);-
402 r[0] = c1;-
403 c1 = 0;-
404 mul_add_c(a[0], b[1], c2, c3, c1);-
405 mul_add_c(a[1], b[0], c2, c3, c1);-
406 r[1] = c2;-
407 c2 = 0;-
408 mul_add_c(a[2], b[0], c3, c1, c2);-
409 mul_add_c(a[1], b[1], c3, c1, c2);-
410 mul_add_c(a[0], b[2], c3, c1, c2);-
411 r[2] = c3;-
412 c3 = 0;-
413 mul_add_c(a[0], b[3], c1, c2, c3);-
414 mul_add_c(a[1], b[2], c1, c2, c3);-
415 mul_add_c(a[2], b[1], c1, c2, c3);-
416 mul_add_c(a[3], b[0], c1, c2, c3);-
417 r[3] = c1;-
418 c1 = 0;-
419 mul_add_c(a[4], b[0], c2, c3, c1);-
420 mul_add_c(a[3], b[1], c2, c3, c1);-
421 mul_add_c(a[2], b[2], c2, c3, c1);-
422 mul_add_c(a[1], b[3], c2, c3, c1);-
423 mul_add_c(a[0], b[4], c2, c3, c1);-
424 r[4] = c2;-
425 c2 = 0;-
426 mul_add_c(a[0], b[5], c3, c1, c2);-
427 mul_add_c(a[1], b[4], c3, c1, c2);-
428 mul_add_c(a[2], b[3], c3, c1, c2);-
429 mul_add_c(a[3], b[2], c3, c1, c2);-
430 mul_add_c(a[4], b[1], c3, c1, c2);-
431 mul_add_c(a[5], b[0], c3, c1, c2);-
432 r[5] = c3;-
433 c3 = 0;-
434 mul_add_c(a[6], b[0], c1, c2, c3);-
435 mul_add_c(a[5], b[1], c1, c2, c3);-
436 mul_add_c(a[4], b[2], c1, c2, c3);-
437 mul_add_c(a[3], b[3], c1, c2, c3);-
438 mul_add_c(a[2], b[4], c1, c2, c3);-
439 mul_add_c(a[1], b[5], c1, c2, c3);-
440 mul_add_c(a[0], b[6], c1, c2, c3);-
441 r[6] = c1;-
442 c1 = 0;-
443 mul_add_c(a[0], b[7], c2, c3, c1);-
444 mul_add_c(a[1], b[6], c2, c3, c1);-
445 mul_add_c(a[2], b[5], c2, c3, c1);-
446 mul_add_c(a[3], b[4], c2, c3, c1);-
447 mul_add_c(a[4], b[3], c2, c3, c1);-
448 mul_add_c(a[5], b[2], c2, c3, c1);-
449 mul_add_c(a[6], b[1], c2, c3, c1);-
450 mul_add_c(a[7], b[0], c2, c3, c1);-
451 r[7] = c2;-
452 c2 = 0;-
453 mul_add_c(a[7], b[1], c3, c1, c2);-
454 mul_add_c(a[6], b[2], c3, c1, c2);-
455 mul_add_c(a[5], b[3], c3, c1, c2);-
456 mul_add_c(a[4], b[4], c3, c1, c2);-
457 mul_add_c(a[3], b[5], c3, c1, c2);-
458 mul_add_c(a[2], b[6], c3, c1, c2);-
459 mul_add_c(a[1], b[7], c3, c1, c2);-
460 r[8] = c3;-
461 c3 = 0;-
462 mul_add_c(a[2], b[7], c1, c2, c3);-
463 mul_add_c(a[3], b[6], c1, c2, c3);-
464 mul_add_c(a[4], b[5], c1, c2, c3);-
465 mul_add_c(a[5], b[4], c1, c2, c3);-
466 mul_add_c(a[6], b[3], c1, c2, c3);-
467 mul_add_c(a[7], b[2], c1, c2, c3);-
468 r[9] = c1;-
469 c1 = 0;-
470 mul_add_c(a[7], b[3], c2, c3, c1);-
471 mul_add_c(a[6], b[4], c2, c3, c1);-
472 mul_add_c(a[5], b[5], c2, c3, c1);-
473 mul_add_c(a[4], b[6], c2, c3, c1);-
474 mul_add_c(a[3], b[7], c2, c3, c1);-
475 r[10] = c2;-
476 c2 = 0;-
477 mul_add_c(a[4], b[7], c3, c1, c2);-
478 mul_add_c(a[5], b[6], c3, c1, c2);-
479 mul_add_c(a[6], b[5], c3, c1, c2);-
480 mul_add_c(a[7], b[4], c3, c1, c2);-
481 r[11] = c3;-
482 c3 = 0;-
483 mul_add_c(a[7], b[5], c1, c2, c3);-
484 mul_add_c(a[6], b[6], c1, c2, c3);-
485 mul_add_c(a[5], b[7], c1, c2, c3);-
486 r[12] = c1;-
487 c1 = 0;-
488 mul_add_c(a[6], b[7], c2, c3, c1);-
489 mul_add_c(a[7], b[6], c2, c3, c1);-
490 r[13] = c2;-
491 c2 = 0;-
492 mul_add_c(a[7], b[7], c3, c1, c2);-
493 r[14] = c3;-
494 r[15] = c1;-
495}
executed 858373 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
858373
496-
497void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)-
498{-
499 BN_ULONG c1, c2, c3;-
500-
501 c1 = 0;-
502 c2 = 0;-
503 c3 = 0;-
504 mul_add_c(a[0], b[0], c1, c2, c3);-
505 r[0] = c1;-
506 c1 = 0;-
507 mul_add_c(a[0], b[1], c2, c3, c1);-
508 mul_add_c(a[1], b[0], c2, c3, c1);-
509 r[1] = c2;-
510 c2 = 0;-
511 mul_add_c(a[2], b[0], c3, c1, c2);-
512 mul_add_c(a[1], b[1], c3, c1, c2);-
513 mul_add_c(a[0], b[2], c3, c1, c2);-
514 r[2] = c3;-
515 c3 = 0;-
516 mul_add_c(a[0], b[3], c1, c2, c3);-
517 mul_add_c(a[1], b[2], c1, c2, c3);-
518 mul_add_c(a[2], b[1], c1, c2, c3);-
519 mul_add_c(a[3], b[0], c1, c2, c3);-
520 r[3] = c1;-
521 c1 = 0;-
522 mul_add_c(a[3], b[1], c2, c3, c1);-
523 mul_add_c(a[2], b[2], c2, c3, c1);-
524 mul_add_c(a[1], b[3], c2, c3, c1);-
525 r[4] = c2;-
526 c2 = 0;-
527 mul_add_c(a[2], b[3], c3, c1, c2);-
528 mul_add_c(a[3], b[2], c3, c1, c2);-
529 r[5] = c3;-
530 c3 = 0;-
531 mul_add_c(a[3], b[3], c1, c2, c3);-
532 r[6] = c1;-
533 r[7] = c2;-
534}
never executed: end of block
0
535-
536void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)-
537{-
538 BN_ULONG c1, c2, c3;-
539-
540 c1 = 0;-
541 c2 = 0;-
542 c3 = 0;-
543 sqr_add_c(a, 0, c1, c2, c3);-
544 r[0] = c1;-
545 c1 = 0;-
546 sqr_add_c2(a, 1, 0, c2, c3, c1);-
547 r[1] = c2;-
548 c2 = 0;-
549 sqr_add_c(a, 1, c3, c1, c2);-
550 sqr_add_c2(a, 2, 0, c3, c1, c2);-
551 r[2] = c3;-
552 c3 = 0;-
553 sqr_add_c2(a, 3, 0, c1, c2, c3);-
554 sqr_add_c2(a, 2, 1, c1, c2, c3);-
555 r[3] = c1;-
556 c1 = 0;-
557 sqr_add_c(a, 2, c2, c3, c1);-
558 sqr_add_c2(a, 3, 1, c2, c3, c1);-
559 sqr_add_c2(a, 4, 0, c2, c3, c1);-
560 r[4] = c2;-
561 c2 = 0;-
562 sqr_add_c2(a, 5, 0, c3, c1, c2);-
563 sqr_add_c2(a, 4, 1, c3, c1, c2);-
564 sqr_add_c2(a, 3, 2, c3, c1, c2);-
565 r[5] = c3;-
566 c3 = 0;-
567 sqr_add_c(a, 3, c1, c2, c3);-
568 sqr_add_c2(a, 4, 2, c1, c2, c3);-
569 sqr_add_c2(a, 5, 1, c1, c2, c3);-
570 sqr_add_c2(a, 6, 0, c1, c2, c3);-
571 r[6] = c1;-
572 c1 = 0;-
573 sqr_add_c2(a, 7, 0, c2, c3, c1);-
574 sqr_add_c2(a, 6, 1, c2, c3, c1);-
575 sqr_add_c2(a, 5, 2, c2, c3, c1);-
576 sqr_add_c2(a, 4, 3, c2, c3, c1);-
577 r[7] = c2;-
578 c2 = 0;-
579 sqr_add_c(a, 4, c3, c1, c2);-
580 sqr_add_c2(a, 5, 3, c3, c1, c2);-
581 sqr_add_c2(a, 6, 2, c3, c1, c2);-
582 sqr_add_c2(a, 7, 1, c3, c1, c2);-
583 r[8] = c3;-
584 c3 = 0;-
585 sqr_add_c2(a, 7, 2, c1, c2, c3);-
586 sqr_add_c2(a, 6, 3, c1, c2, c3);-
587 sqr_add_c2(a, 5, 4, c1, c2, c3);-
588 r[9] = c1;-
589 c1 = 0;-
590 sqr_add_c(a, 5, c2, c3, c1);-
591 sqr_add_c2(a, 6, 4, c2, c3, c1);-
592 sqr_add_c2(a, 7, 3, c2, c3, c1);-
593 r[10] = c2;-
594 c2 = 0;-
595 sqr_add_c2(a, 7, 4, c3, c1, c2);-
596 sqr_add_c2(a, 6, 5, c3, c1, c2);-
597 r[11] = c3;-
598 c3 = 0;-
599 sqr_add_c(a, 6, c1, c2, c3);-
600 sqr_add_c2(a, 7, 5, c1, c2, c3);-
601 r[12] = c1;-
602 c1 = 0;-
603 sqr_add_c2(a, 7, 6, c2, c3, c1);-
604 r[13] = c2;-
605 c2 = 0;-
606 sqr_add_c(a, 7, c3, c1, c2);-
607 r[14] = c3;-
608 r[15] = c1;-
609}
executed 237118 times by 1 test: end of block
Executed by:
  • libcrypto.so.1.1
237118
610-
611void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)-
612{-
613 BN_ULONG c1, c2, c3;-
614-
615 c1 = 0;-
616 c2 = 0;-
617 c3 = 0;-
618 sqr_add_c(a, 0, c1, c2, c3);-
619 r[0] = c1;-
620 c1 = 0;-
621 sqr_add_c2(a, 1, 0, c2, c3, c1);-
622 r[1] = c2;-
623 c2 = 0;-
624 sqr_add_c(a, 1, c3, c1, c2);-
625 sqr_add_c2(a, 2, 0, c3, c1, c2);-
626 r[2] = c3;-
627 c3 = 0;-
628 sqr_add_c2(a, 3, 0, c1, c2, c3);-
629 sqr_add_c2(a, 2, 1, c1, c2, c3);-
630 r[3] = c1;-
631 c1 = 0;-
632 sqr_add_c(a, 2, c2, c3, c1);-
633 sqr_add_c2(a, 3, 1, c2, c3, c1);-
634 r[4] = c2;-
635 c2 = 0;-
636 sqr_add_c2(a, 3, 2, c3, c1, c2);-
637 r[5] = c3;-
638 c3 = 0;-
639 sqr_add_c(a, 3, c1, c2, c3);-
640 r[6] = c1;-
641 r[7] = c2;-
642}
executed 3585515 times by 2 tests: end of block
Executed by:
  • libcrypto.so.1.1
  • sm2_internal_test
3585515
643#endif-
Source codeSwitch to Preprocessed file

Generated by Squish Coco 4.2.2