12 #if CRYPTOPP_MSC_VERSION
13 # pragma warning(disable: 4731)
18 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
22 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
23 # define VMAC_BOOL_WORD128 1
25 # define VMAC_BOOL_WORD128 0
29 #define const // Turbo C++ 2006 workaround
31 static const word64 p64 = W64LIT(0xfffffffffffffeff);
32 static const word64 m62 = W64LIT(0x3fffffffffffffff);
33 static const word64 m63 = W64LIT(0x7fffffffffffffff);
34 static const word64 m64 = W64LIT(0xffffffffffffffff);
35 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);
42 #define m126 ((word128(m62)<<64)|m64)
44 static const word128 m126 = (word128(m62)<<64)|m64;
51 if (digestLength != 8 && digestLength != 16)
53 m_is128 = digestLength == 16;
56 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
57 throw InvalidArgument(
"VMAC: L1KeyLength must be a positive multiple of 128");
62 cipher.
SetKey(userKey, keylength, params);
63 const unsigned int blockSize = cipher.
BlockSize();
64 const unsigned int blockSizeInWords = blockSize /
sizeof(word64);
73 ConditionalByteReverse<word64>(
BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*
sizeof(word64));
78 for (i = 0; i <= (size_t)m_is128; i++)
89 word64 *l3Key = m_l3Key();
90 assert(
IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
92 for (i = 0; i <= (size_t)m_is128; i++)
99 }
while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
103 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
115 size_t length = ThrowIfInvalidIVLength(len);
117 byte *storedNonce = m_nonce();
121 memset(storedNonce, 0, s-length);
122 memcpy(storedNonce+s-length, nonce, length);
127 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
130 for (
size_t i=0; m_padCached && i<s-length; i++)
131 m_padCached = (storedNonce[i] == 0);
135 memset(storedNonce, 0, s-length);
136 memcpy(storedNonce+s-length, nonce, length-1);
137 storedNonce[s-1] = nonce[length-1] & 0xfe;
141 storedNonce[s-1] = nonce[length-1];
143 m_isFirstBlock =
true;
147 void VMAC_Base::HashEndianCorrectedBlock(
const word64 *data)
149 CRYPTOPP_UNUSED(data);
151 throw NotImplemented(
"VMAC: HashEndianCorrectedBlock is not implemented");
157 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_VMAC_ASM)
163 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
164 #if CRYPTOPP_MSC_VERSION
165 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
169 __attribute__ ((noinline))
171 VMAC_Base::VHASH_Update_SSE2(
const word64 *data,
size_t blocksRemainingInWord64,
int tagPart)
173 assert(
IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
174 assert(
IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
176 const word64 *nhK = m_nhKey();
177 word64 *polyS = (word64*)(
void*)m_polyState();
178 word32 L1KeyLength = m_L1KeyLength;
181 CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
182 CRYPTOPP_UNUSED(blocksRemainingInWord64);
192 #
if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
193 char isFirstBlock = m_isFirstBlock;
194 AS2( mov ebx, [L1KeyLength])
195 AS2( mov dl, [isFirstBlock])
198 AS2( mov ebx, [ecx+m_L1KeyLength])
199 AS2( mov dl, [ecx+m_isFirstBlock])
201 AS2( mov eax, tagPart)
209 AS2( mov ecx, blocksRemainingInWord64)
213 #if CRYPTOPP_BOOL_X32
225 AS2( lea ebp, [edi+8*ebp])
226 AS2( movq mm6, [esi])
227 AS2( paddq mm6, [edi])
228 AS2( movq mm5, [esi+8])
229 AS2( paddq mm5, [edi+8])
233 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
234 AS2( pmuludq mm6, mm5)
235 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
236 AS2( pmuludq mm5, mm2)
237 AS2( pmuludq mm2, mm3)
238 AS2( pmuludq mm3, mm4)
240 AS2( movd [esp], mm6)
242 #if CRYPTOPP_BOOL_X32
243 AS2( movd [esp+8], mm5)
245 AS2( movd [esp+4], mm5)
251 AS2( movq mm0, [esi])
252 AS2( paddq mm0, [edi])
253 AS2( movq mm1, [esi+8])
254 AS2( paddq mm1, [edi+8])
259 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
260 AS2( pmuludq mm0, mm1)
261 #if CRYPTOPP_BOOL_X32
262 AS2( movd [esp+16], mm3)
264 AS2( movd [esp+8], mm3)
268 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
269 AS2( pmuludq mm1, mm2)
270 AS2( pmuludq mm2, mm3)
271 AS2( pmuludq mm3, mm4)
272 AS2( movd mm4, [esp])
274 #if CRYPTOPP_BOOL_X32
275 AS2( movd mm4, [esp+8])
277 AS2( movd mm4, [esp+16])
279 AS2( movd mm4, [esp+4])
281 AS2( movd mm4, [esp+8])
284 AS2( movd [esp], mm0)
287 #if CRYPTOPP_BOOL_X32
288 AS2( movd [esp+8], mm1)
290 AS2( movd [esp+4], mm1)
298 #if CRYPTOPP_BOOL_X32
299 AS2( movd [esp+16], mm3)
301 AS2( movd [esp+8], mm3)
305 AS2( movd mm4, [esp])
307 #if CRYPTOPP_BOOL_X32
308 AS2( movd mm4, [esp+8])
310 AS2( movd mm4, [esp+16])
312 AS2( movd mm4, [esp+4])
314 AS2( movd mm4, [esp+8])
317 AS2( lea ebp, [8*ebx])
320 AS2( movd [esp], mm7)
323 #if CRYPTOPP_BOOL_X32
324 AS2( movd [esp+8], mm6)
326 AS2( movd [esp+4], mm6)
337 #define k0 [eax+2*8+2*4]
338 #define k1 [eax+2*8+3*4]
339 #define k2 [eax+2*8+0*4]
340 #define k3 [eax+2*8+1*4]
344 AS2( movd mm0, [esp])
349 #if CRYPTOPP_BOOL_X32
350 AS2( movd mm2, [esp+8])
352 AS2( movd mm2, [esp+4])
366 AS2( pmuludq mm0, k3)
368 AS2( pmuludq mm1, k2)
371 AS2( pmuludq mm2, mm6)
377 AS2( pmuludq mm3, mm7)
378 AS2( pmuludq mm4, mm7)
379 AS2( pmuludq mm5, mm6)
384 AS2( pmuludq mm1, k2)
389 AS2( pmuludq mm2, k3)
390 AS2( pmuludq mm3, mm7)
391 #if CRYPTOPP_BOOL_X32
392 AS2( movd [esp+16], mm0)
394 AS2( movd [esp+8], mm0)
397 AS2( pmuludq mm7, mm5)
398 AS2( pmuludq mm5, k3)
401 AS2( pmuludq mm1, k2)
406 AS2( pmuludq mm2, mm6)
407 AS2( pmuludq mm6, a0)
410 AS2( movd mm3, [esp])
413 AS2( pmuludq mm3, k3)
416 AS2( pmuludq mm1, k2)
418 #if CRYPTOPP_BOOL_X32
419 AS2( movd mm2, [esp+8])
421 AS2( movd mm2, [esp+4])
429 #if CRYPTOPP_BOOL_X32
430 AS2( movd mm7, [esp+16])
432 AS2( movd mm7, [esp+8])
457 #if CRYPTOPP_BOOL_X32
468 :
"m" (L1KeyLength),
"c" (blocksRemainingInWord64),
"S" (data),
"D" (nhK+tagPart*2),
"d" (m_isFirstBlock),
"a" (polyS+tagPart*4)
475 #if VMAC_BOOL_WORD128
476 #define DeclareNH(a) word128 a=0
477 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
478 #define AccumulateNH(a, b, c) a += word128(b)*(c)
479 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
481 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
482 #define MUL32(a, b) __emulu(word32(a), word32(b))
484 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
486 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
487 #define DeclareNH(a) word64 a##0=0, a##1=0
488 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
489 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
490 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
491 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
492 #define DeclareNH(a) word64 a##0=0, a##1=0
493 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
494 #define AccumulateNH(a, b, c) {\
496 pl = _umul128(b,c,&ph);\
498 a##1 += ph + (a##0 < pl);}
500 #define VMAC_BOOL_32BIT 1
501 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
502 #define MUL64(rh,rl,i1,i2) \
503 { word64 _i1 = (i1), _i2 = (i2); \
504 word64 m1= MUL32(_i1,_i2>>32); \
505 word64 m2= MUL32(_i1>>32,_i2); \
506 rh = MUL32(_i1>>32,_i2>>32); \
507 rl = MUL32(_i1,_i2); \
508 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
509 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
511 #define AccumulateNH(a, b, c) {\
512 word64 p = MUL32(b, c);\
513 a##1 += word32((p)>>32);\
515 p = MUL32((b)>>32, c);\
516 a##2 += word32((p)>>32);\
518 p = MUL32((b)>>32, (c)>>32);\
520 p = MUL32(b, (c)>>32);\
522 a##2 += word32(p>>32);}
525 #ifndef VMAC_BOOL_32BIT
526 #define VMAC_BOOL_32BIT 0
529 #define ADD128(rh,rl,ih,il) \
530 { word64 _il = (il); \
532 (rh) += (ih) + ((rl) < (_il)); \
536 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
537 template <
bool T_128BitTag>
539 void VMAC_Base::VHASH_Update_Template(
const word64 *data,
size_t blocksRemainingInWord64)
541 assert(
IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
542 assert(
IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
544 #define INNER_LOOP_ITERATION(j) {\
545 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
546 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
547 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
549 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
552 #if (defined(_MSC_VER) && _MSC_VER < 1300)
553 bool T_128BitTag = m_is128;
555 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
556 size_t innerLoopEnd = L1KeyLengthInWord64;
557 const word64 *nhK = m_nhKey();
558 word64 *polyS = (word64*)(
void*)m_polyState();
559 bool isFirstBlock =
true;
563 #if VMAC_BOOL_WORD128
566 word64 ah1=0, al1=0, ah2=0, al2=0;
568 word64 kh1, kl1, kh2, kl2;
569 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
572 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
582 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
584 if (blocksRemainingInWord64 % 8)
586 innerLoopEnd = blocksRemainingInWord64 % 8;
587 for (; i<innerLoopEnd; i+=2)
588 INNER_LOOP_ITERATION(0);
590 innerLoopEnd = blocksRemainingInWord64;
592 for (; i<innerLoopEnd; i+=8)
594 INNER_LOOP_ITERATION(0);
595 INNER_LOOP_ITERATION(1);
596 INNER_LOOP_ITERATION(2);
597 INNER_LOOP_ITERATION(3);
599 blocksRemainingInWord64 -= innerLoopEnd;
600 data += innerLoopEnd;
603 word32 nh0[2], nh1[2];
606 nh0[0] = word32(nhA0);
607 nhA1 += (nhA0 >> 32);
608 nh1[0] = word32(nhA1);
609 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
613 nh0[1] = word32(nhB0);
614 nhB1 += (nhB0 >> 32);
615 nh1[1] = word32(nhB1);
616 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
619 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
620 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
621 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
622 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
623 #define aHi ((polyS+i*4)[0])
624 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
625 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
626 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
627 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
628 #define kHi ((polyS+i*4+2)[0])
632 isFirstBlock =
false;
635 m_isFirstBlock =
false;
636 for (i=0; i<=(size_t)T_128BitTag; i++)
638 word64 t = (word64)nh0[i] + k0;
640 t = (t >> 32) + nh1[i] + k1;
642 aHi = (t >> 32) + nh2[i] + kHi;
647 for (i=0; i<=(size_t)T_128BitTag; i++)
663 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
667 p += MUL32(a1, 2*k3);
668 p += MUL32(a2, 2*k2);
669 p += MUL32(a3, 2*k1);
675 p += MUL32(a2, 2*k3);
676 p += MUL32(a3, 2*k2);
692 #else // #if VMAC_BOOL_32BIT
695 isFirstBlock =
false;
698 m_isFirstBlock =
false;
699 #if VMAC_BOOL_WORD128
700 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
702 first_poly_step(a1, kh1, kl1, nhA);
704 first_poly_step(a2, kh2, kl2, nhB);
706 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
708 ADD128(mh, ml, kh, kl); \
711 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
713 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
719 #if VMAC_BOOL_WORD128
720 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
722 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
726 #if VMAC_BOOL_WORD128
727 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
729 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
735 #if VMAC_BOOL_WORD128
736 #define poly_step(a, kh, kl, m) \
737 { word128 t1, t2, t3, t4;\
738 Multiply128(t2, a>>64, kl);\
739 Multiply128(t3, a, kh);\
740 Multiply128(t1, a, kl);\
741 Multiply128(t4, a>>64, 2*kh);\
745 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
750 poly_step(a1, kh1, kl1, nhA);
752 poly_step(a2, kh2, kl2, nhB);
754 #define poly_step(ah, al, kh, kl, mh, ml) \
755 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
757 MUL64(t2h,t2l,ah,kl); \
758 MUL64(t3h,t3l,al,kh); \
759 MUL64(t1h,t1l,ah,2*kh); \
760 MUL64(ah,al,al,kl); \
762 ADD128(t2h,t2l,t3h,t3l); \
764 ADD128(ah,al,t1h,t1l); \
767 ADD128(t2h,ah,z,t2l); \
769 t2h += t2h + (ah >> 63); \
773 ADD128(ah,al,mh,ml); \
774 ADD128(ah,al,z,t2h); \
777 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
779 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
781 #endif // #if VMAC_BOOL_32BIT
782 }
while (blocksRemainingInWord64);
784 #if VMAC_BOOL_WORD128
785 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
788 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
790 #elif !VMAC_BOOL_32BIT
791 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
794 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
799 inline void VMAC_Base::VHASH_Update(
const word64 *data,
size_t blocksRemainingInWord64)
801 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
804 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
806 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
807 m_isFirstBlock =
false;
812 #if defined(_MSC_VER) && _MSC_VER < 1300
813 VHASH_Update_Template(data, blocksRemainingInWord64);
816 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
818 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
823 size_t VMAC_Base::HashMultipleBlocks(
const word64 *data,
size_t length)
825 size_t remaining =
ModPowerOf2(length, m_L1KeyLength);
826 VHASH_Update(data, (length-remaining)/8);
830 static word64 L3Hash(
const word64 *input,
const word64 *l3Key,
size_t len)
832 word64 rh, rl, t, z=0;
833 word64 p1 = input[0], p2 = input[1];
834 word64 k1 = l3Key[0], k2 = l3Key[1];
839 ADD128(p1, p2, len, t);
841 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
842 ADD128(p1, p2, z, t);
848 t += (word32)t > 0xfffffffeU;
854 p1 += (0 - (p1 < k1)) & 257;
856 p2 += (0 - (p2 < k2)) & 257;
859 MUL64(rh, rl, p1, p2);
861 ADD128(t, rl, z, rh);
863 ADD128(t, rl, z, rh);
866 rl += (0 - (rl < t)) & 257;
867 rl += (0 - (rl > p64-1)) & 257;
873 assert(
IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
874 assert(
IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
875 size_t len =
ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
879 memset(m_data()+len, 0, (0-len)%16);
880 VHASH_Update(DataBuf(), ((len+15)/16)*2);
883 else if (m_isFirstBlock)
886 m_polyState()[0] = m_polyState()[2];
887 m_polyState()[1] = m_polyState()[3];
890 m_polyState()[4] = m_polyState()[6];
891 m_polyState()[5] = m_polyState()[7];
898 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(
true,
BIG_ENDIAN_ORDER, m_pad());
899 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(
true,
BIG_ENDIAN_ORDER, m_pad()+8);
909 memcpy(mac, t, size);
914 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
921 memcpy(mac, &t, size);