* This file is part of the openHiTLS project.
*
* openHiTLS is licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#include "hitls_build.h"
#if defined(HITLS_CRYPTO_CURVE_NISTP256) && defined(HITLS_CRYPTO_NIST_USE_ACCEL)
#include <stdbool.h>
#include <string.h>
#include "bsl_err_internal.h"
#include "bsl_util_internal.h"
#include "crypt_errno.h"
#include "crypt_utils.h"
#include "crypt_bn.h"
#include "crypt_ecc.h"
#include "ecc_local.h"
#include "ecc_utils.h"
#ifndef HITLS_PLATFORM_INT128
#error "This nistp256 implementation require the compiler support 128-bits integer."
#endif
#define FELEM_BITS 256
#define FELEM_BYTES 32
#define LIMB_BITS (sizeof(uint128_t) << 3)
#define LIMB_NUM 4
#define BASE_BITS 64
#define TABLE_G_SIZE 16
#define TABLE_P_SIZE 17
* Field elements, stored as arrays, all represented in little endian.
* Each element of the array is called a digit. Each digit represents an extended 2 ^ 64-bit.
* That is, Felem can be expressed as:
* f_0 + f_1 * 2^64 + f_2 * 2^128 + f_3 * 2^192
* LongFelem is used to store the result of multiplication of field elements and is twice the width of Felem.
* Point is a point represented as a Jacobian coordinate
*/
typedef struct {
uint128_t data[LIMB_NUM];
} Felem;
typedef struct {
uint128_t data[LIMB_NUM * 2];
} LongFelem;
typedef struct {
Felem x;
Felem y;
Felem z;
} Point;
static const Felem FIELD_ORDER = {
{0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001}
};
* Pre-computation table of the base point G, which contains the (X, Y, Z) coordinates of k*G
*
* PRE_MUL_G divides all bits into four equal parts.
* index corresponding bit value of k
* 0 0 0 0 0 0 + 0 + 0 + 0
* 1 0 0 0 1 0 + 0 + 0 + 1
* 2 0 0 1 0 0 + 0 + 2^64 + 0
* 3 0 0 1 1 0 + 0 + 2^64 + 1
* 4 0 1 0 0 0 + 2^128 + 0 + 0
* 5 0 1 0 1 0 + 2^128 + 0 + 1
* 6 0 1 1 0 0 + 2^128 + 2^64 + 0
* 7 0 1 1 1 0 + 2^128 + 2^64 + 1
* 8 1 0 0 0 2^192 + 0 + 0 + 0
* 9 1 0 0 1 2^192 + 0 + 0 + 1
* 10 1 0 1 0 2^192 + 0 + 2^64 + 0
* 11 1 0 1 1 2^192 + 0 + 2^64 + 1
* 12 1 1 0 0 2^192 + 2^128 + 0 + 0
* 13 1 1 0 1 2^192 + 2^128 + 0 + 1
* 14 1 1 1 0 2^192 + 2^128 + 2^64 + 0
* 15 1 1 1 1 2^192 + 2^128 + 2^64 + 1
*
*/
static const Point PRE_MUL_G[TABLE_G_SIZE] = {
{
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247}},
{{0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5}},
{{0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f}},
{{0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67}},
{{0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff}},
{{0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8}},
{{0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea}},
{{0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276}},
{{0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad}},
{{0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d}},
{{0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287}},
{{0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466}},
{{0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9}},
{{0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a}},
{{0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c}},
{{0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}
};
* Pre-computation table of the base point G, which contains the (X, Y, Z) coordinates of k*G
*
* PRE_MUL_G2[] = PRE_MUL_G[] * 2^32
* index corresponding bit value of k
* 0 0 0 0 0 0 + 0 + 0 + 0
* 1 0 0 0 1 0 + 0 + 0 + 2^32
* 2 0 0 1 0 0 + 0 + 2^96 + 0
* 3 0 0 1 1 0 + 0 + 2^96 + 2^32
* 4 0 1 0 0 0 + 2^160 + 0 + 0
* 5 0 1 0 1 0 + 2^160 + 0 + 2^32
* 6 0 1 1 0 0 + 2^160 + 2^96 + 0
* 7 0 1 1 1 0 + 2^160 + 2^96 + 2^32
* 8 1 0 0 0 2^224 + 0 + 0 + 0
* 9 1 0 0 1 2^224 + 0 + 0 + 2^32
* 10 1 0 1 0 2^224 + 0 + 2^96 + 0
* 11 1 0 1 1 2^224 + 0 + 2^96 + 2^32
* 12 1 1 0 0 2^224 + 2^160 + 0 + 0
* 13 1 1 0 1 2^224 + 2^160 + 0 + 2^32
* 14 1 1 1 0 2^224 + 2^160 + 2^96 + 0
* 15 1 1 1 1 2^224 + 2^160 + 2^96 + 2^32
*/
static const Point PRE_MUL_G2[TABLE_G_SIZE] = {
{
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
{{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89}},
{{0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6}},
{{0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a}},
{{0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617}},
{{0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276}},
{{0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908}},
{{0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7}},
{{0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee}},
{{0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109}},
{{0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba}},
{{0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b}},
{{0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107}},
{{0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503}},
{{0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9}},
{{0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}, {
{{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6}},
{{0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81}},
{{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
}
};
* Convert big-endian byte stream to Felem
*/
static inline void Bin2Felem(Felem *out, const uint8_t in[FELEM_BYTES])
{
out->data[3] = (uint128_t)Uint64FromBeBytes(in);
out->data[2] = (uint128_t)Uint64FromBeBytes(in + 8);
out->data[1] = (uint128_t)Uint64FromBeBytes(in + 16);
out->data[0] = (uint128_t)Uint64FromBeBytes(in + 24);
}
* Convert Felem to big-endian byte stream
* Input:
* in[] < 2^64
* Output:
* out length is 32
*/
static inline void Felem2Bin(uint8_t out[FELEM_BYTES], const Felem *in)
{
Uint64ToBeBytes((uint64_t)in->data[3], out);
Uint64ToBeBytes((uint64_t)in->data[2], out + 8);
Uint64ToBeBytes((uint64_t)in->data[1], out + 16);
Uint64ToBeBytes((uint64_t)in->data[0], out + 24);
}
* Convert BN to Felem
* Output:
* out[] < 2^64
*/
static int32_t BN2Felem(Felem *out, const BN_BigNum *in)
{
int32_t ret;
uint8_t bin[FELEM_BYTES];
uint32_t len = FELEM_BYTES;
GOTO_ERR_IF(BN_Bn2Bin(in, bin, &len), ret);
for (uint32_t i = 0; i < FELEM_BYTES; ++i) {
bin[FELEM_BYTES - 1 - i] = i < len ? bin[len - 1 - i] : 0;
}
Bin2Felem(out, bin);
ERR:
return ret;
}
* Convert Felem to BN
* Input:
* in[] < 2^64
*/
static int32_t Felem2BN(BN_BigNum *out, const Felem *in)
{
int32_t ret;
uint8_t bin[FELEM_BYTES];
Felem2Bin(bin, in);
GOTO_ERR_IF(BN_Bin2Bn(out, bin, FELEM_BYTES), ret);
ERR:
return ret;
}
* Assignment
*/
static inline void FelemAssign(Felem *out, const Felem *in)
{
out->data[0] = in->data[0];
out->data[1] = in->data[1];
out->data[2] = in->data[2];
out->data[3] = in->data[3];
}
* Copy each digit by mask. If the corresponding bit is 1, copy it. If the corresponding bit is 0, retain it.
*/
static inline void FelemAssignWithMask(Felem *out, const Felem *in, const uint128_t mask)
{
uint128_t rmask = ~mask;
out->data[0] = (in->data[0] & mask) | (out->data[0] & rmask);
out->data[1] = (in->data[1] & mask) | (out->data[1] & rmask);
out->data[2] = (in->data[2] & mask) | (out->data[2] & rmask);
out->data[3] = (in->data[3] & mask) | (out->data[3] & rmask);
}
* Set the lowest digit
*/
static inline void FelemSetLimb(Felem *out, const uint128_t in)
{
out->data[0] = in;
out->data[1] = 0;
out->data[2] = 0;
out->data[3] = 0;
}
* Zero judgment: input less than 2 ^ 256, only 0 and p need to be judged.
* Input:
* in[] < 2^64
*/
static inline uint128_t FelemIsZero(const Felem *in)
{
uint128_t isZero, isP;
isZero = in->data[0] | in->data[1] | in->data[2] | in->data[3];
isZero -= 1;
isP = (in->data[0] ^ FIELD_ORDER.data[0]) |
(in->data[1] ^ FIELD_ORDER.data[1]) |
(in->data[2] ^ FIELD_ORDER.data[2]) |
(in->data[3] ^ FIELD_ORDER.data[3]);
isP -= 1;
return (isZero | isP) >> (LIMB_BITS - 1);
}
* Obtain the bit string whose length is len at idx in Felem.
* Input:
* in[] < 2^64
* 0 < len <= 64
*/
static uint64_t FelemGetBits(const Felem *in, int32_t idx, uint32_t len)
{
uint128_t ret;
uint32_t lower, upper;
uint64_t mask;
lower = (uint32_t)idx;
mask = (uint64_t)0 - ((~lower & (lower - 256)) >> 31);
ret = (uint64_t)in->data[(lower / BASE_BITS) & mask] & mask;
upper = (uint32_t)idx + BASE_BITS;
mask = (uint64_t)0 - ((~upper & (upper - 256)) >> 31);
ret |= (uint128_t)((uint64_t)in->data[(upper / BASE_BITS) & mask] & mask) << BASE_BITS;
ret >>= lower & (BASE_BITS - 1);
ret &= ((uint64_t)1 << len) - 1;
return (uint64_t)ret;
}
* Field element reduction, retains the base bit (64 bits) of each digit in the 4-ary Felem, and reduce the high bit.
* Should be called before modular multiplication, modulus square, or modulus
* Input:
* in[] < 2^127
* Output:
* out[] < 2^64
*/
static void FelemReduce(Felem *out, const Felem *in)
{
uint128_t *po = out->data;
const uint128_t *pi = in->data;
const uint128_t borrow = (uint128_t)1 << 96;
const uint128_t lend = (uint128_t)1 << 32;
uint128_t carryLimb, carryLimbTotal;
po[1] = pi[1] + (pi[0] >> BASE_BITS);
po[2] = pi[2] + (po[1] >> BASE_BITS);
po[3] = pi[3] + (po[2] >> BASE_BITS);
po[0] = (uint64_t)pi[0];
po[1] = (uint64_t)po[1];
po[2] = (uint64_t)po[2];
carryLimbTotal = carryLimb = po[3] >> BASE_BITS;
po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb;
carryLimbTotal += (carryLimb = po[3] >> BASE_BITS);
po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb;
carryLimbTotal += (carryLimb = po[3] >> BASE_BITS);
po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb;
po[0] += carryLimbTotal;
po[1] += ((po[0] >> BASE_BITS) ^ borrow) - (carryLimbTotal << 32);
po[2] += ((po[1] >> BASE_BITS) ^ borrow) - lend;
po[3] += (po[2] >> BASE_BITS) - lend;
po[0] = (uint64_t)po[0];
po[1] = (uint64_t)po[1];
po[2] = (uint64_t)po[2];
}
* Field element reduction, converting 8-ary LongFelem to 4-ary Felem
* Input:
* in[] < 2^80
* Output:
* out[] < 2^115
* - out[0] < (2^114 + 2^82 - 2^50) + 2^(80 + 32) + 2 * 2^80 < 2^115
* - out[1] < (2^114 + 2^82 - 2^50) + 2^(80 + 33) + 2 * 2^80 < 2^115
* - out[2] < (2^114 + 2^82 - 2^50) + 2^(80 + 33) + 4 * 2^80 < 2^115
* - out[3] < (2^114 + 2^82 - 2^50) + 2^(80 + 32) + 4 * 2^80 < 2^115
* out[] > 0
* - out[0] > (2^114 - 2^82) - 2 * 2^(80 + 32) - 2 * 2^80 > 0
* - out[1] > (2^114 - 2^82) - 2^(80 + 32) - 2^80 > 0
* - out[2] > (2^114 - 2^82) - 2^(80 + 32) - 2^80 > 0
* - out[3] > (2^114 - 2^82) - 2 * 2^(80 + 32) - 2^80 > 0
*/
static void LongFelemReduce(Felem *out, const LongFelem *in)
{
static const Felem zeroBase = {
{
(((uint128_t)1 << 64) - 1) << 50,
(((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 50,
(((uint128_t)1 << 64) - 1) << 50,
(((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 50,
}
};
uint128_t *po = out->data;
const uint128_t *pi = in->data;
po[0] = zeroBase.data[0] + pi[0] + pi[4] + (pi[5] << 32) - pi[6] - (pi[6] << 32) - pi[7] - (pi[7] << 32);
po[1] = zeroBase.data[1] + pi[1] - (pi[4] << 32) + pi[5] + (pi[6] << 33) - pi[7];
po[2] = zeroBase.data[2] + pi[2] - pi[5] - (pi[5] << 32) + (pi[6] << 1) + pi[7] + (pi[7] << 33);
po[3] = zeroBase.data[3] + pi[3] - pi[4] + (pi[4] << 32) - (pi[5] << 32) - (pi[6] << 32) + (pi[7] * 3);
}
* field element modulo: convert the field element to a unique value within [0, p)
* Input:
* in[] < 2^64
* Output:
* out < p, out[] < 2^64
*/
static void FelemContract(Felem *out, const Felem *in)
{
uint128_t *po = out->data;
const uint128_t *pi = in->data;
const uint128_t borrow = (uint128_t)1 << BASE_BITS;
const uint128_t lend = 1;
bool isGreaterOrEqual = true;
for (int32_t i = 3; i >= 0; i--) {
if (pi[i] > FIELD_ORDER.data[i]) {
break;
} else if (pi[i] < FIELD_ORDER.data[i]) {
isGreaterOrEqual = false;
break;
}
}
if (isGreaterOrEqual) {
po[3] = (pi[3] - lend) - FIELD_ORDER.data[3];
po[2] = (borrow ^ pi[2]) - lend;
po[1] = ((borrow ^ pi[1]) - lend) - FIELD_ORDER.data[1];
po[0] = (borrow ^ pi[0]) - FIELD_ORDER.data[0];
}
po[1] += po[0] >> BASE_BITS;
po[2] += po[1] >> BASE_BITS;
po[3] += po[2] >> BASE_BITS;
po[0] = (uint64_t)po[0];
po[1] = (uint64_t)po[1];
po[2] = (uint64_t)po[2];
}
* Field Addition
*/
static inline void FelemAdd(Felem *out, const Felem *a, const Felem *b)
{
out->data[0] = a->data[0] + b->data[0];
out->data[1] = a->data[1] + b->data[1];
out->data[2] = a->data[2] + b->data[2];
out->data[3] = a->data[3] + b->data[3];
}
* Field element negation
* Input:
* in[] <= 2^124 - 2^92
* Output:
* out[] <= 2^124 + 2^92 - 2^60 + in[]
*/
static inline void FelemNeg(Felem *out, const Felem *in)
{
static const Felem zeroBase = {{
(((uint128_t)1 << 64) - 1) << 60,
(((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 60,
(((uint128_t)1 << 64) - 1) << 60,
(((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 60,
}};
out->data[0] = zeroBase.data[0] - in->data[0];
out->data[1] = zeroBase.data[1] - in->data[1];
out->data[2] = zeroBase.data[2] - in->data[2];
out->data[3] = zeroBase.data[3] - in->data[3];
}
* Field subtraction
* Input:
* a[] < 2^128 - 2^124 - 2^92 + 2^60,b[] <= 2^124 - 2^92
* Output:
* out[] <= 2^124 + 2^92 - 2^60 + a[] < 2^128
*/
static inline void FelemSub(Felem *out, const Felem *a, const Felem *b)
{
static const Felem zeroBase = {{
(((uint128_t)1 << 64) - 1) << 60,
(((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 60,
(((uint128_t)1 << 64) - 1) << 60,
(((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 60,
}};
out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0];
out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1];
out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2];
out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3];
}
* Field subtraction. Input LongFelem directly.
* Input:
* a[] < 2^128 - 2^74 - 2^42 + 2^10,b[] <= 2^74 - 2^42
* Output:
* out[] <= 2^74 + 2^42 - 2^10 + a[] < 2^128
*/
static void LongFelemSub(LongFelem *out, const LongFelem *a, const LongFelem *b)
{
static const LongFelem zeroBase = {{
((uint128_t)1 << 64) << 10,
(((uint128_t)1 << 64) - 1) << 10,
(((uint128_t)1 << 64) - 1) << 10,
(((uint128_t)1 << 64) - 1) << 10,
(((uint128_t)1 << 64) - 2) << 10,
(((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 10,
(((uint128_t)1 << 64) - 1) << 10,
(((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 10,
}};
out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0];
out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1];
out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2];
out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3];
out->data[4] = zeroBase.data[4] + a->data[4] - b->data[4];
out->data[5] = zeroBase.data[5] + a->data[5] - b->data[5];
out->data[6] = zeroBase.data[6] + a->data[6] - b->data[6];
out->data[7] = zeroBase.data[7] + a->data[7] - b->data[7];
}
* Scale the field element
* Use only a small magnification(scale) factor to ensure that in[] * scalar does not overflow.
*/
static inline void FelemScale(Felem *out, const Felem *in, const uint32_t scalar)
{
out->data[0] = in->data[0] * scalar;
out->data[1] = in->data[1] * scalar;
out->data[2] = in->data[2] * scalar;
out->data[3] = in->data[3] * scalar;
}
* Scale the field element. Input LongFelem directly.
* Use only a small magnification(scale) factor to ensure that in[] * scalar does not overflow.
*/
static inline void LongFelemScale(LongFelem *out, const LongFelem *in, const uint32_t scalar)
{
out->data[0] = in->data[0] * scalar;
out->data[1] = in->data[1] * scalar;
out->data[2] = in->data[2] * scalar;
out->data[3] = in->data[3] * scalar;
out->data[4] = in->data[4] * scalar;
out->data[5] = in->data[5] * scalar;
out->data[6] = in->data[6] * scalar;
out->data[7] = in->data[7] * scalar;
}
* Field Multiplication
* Input:
* a[] < 2^64, b[] < 2^64
* Output:
* out[] < 2^67
* - out[0] < 2^64
* - out[1] < 2^64 * 3
* - out[2] < 2^64 * 5
* - out[3] < 2^64 * 7
* - out[4] < 2^64 * 7
* - out[5] < 2^64 * 5
* - out[6] < 2^64 * 3
* - out[7] < 2^64
*/
static void FelemMul(LongFelem *out, const Felem *a, const Felem *b)
{
const uint64_t a64[4] = {(uint64_t)a->data[0], (uint64_t)a->data[1], (uint64_t)a->data[2], (uint64_t)a->data[3]};
const uint64_t b64[4] = {(uint64_t)b->data[0], (uint64_t)b->data[1], (uint64_t)b->data[2], (uint64_t)b->data[3]};
uint128_t limbMul;
limbMul = (uint128_t)a64[0] * b64[0];
out->data[0] = (uint64_t)limbMul;
out->data[1] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[0] * b64[1];
out->data[1] += (uint64_t)limbMul;
out->data[2] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[1] * b64[0];
out->data[1] += (uint64_t)limbMul;
out->data[2] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[0] * b64[2];
out->data[2] += (uint64_t)limbMul;
out->data[3] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[1] * b64[1];
out->data[2] += (uint64_t)limbMul;
out->data[3] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[2] * b64[0];
out->data[2] += (uint64_t)limbMul;
out->data[3] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[0] * b64[3];
out->data[3] += (uint64_t)limbMul;
out->data[4] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[1] * b64[2];
out->data[3] += (uint64_t)limbMul;
out->data[4] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[2] * b64[1];
out->data[3] += (uint64_t)limbMul;
out->data[4] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[3] * b64[0];
out->data[3] += (uint64_t)limbMul;
out->data[4] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[1] * b64[3];
out->data[4] += (uint64_t)limbMul;
out->data[5] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[2] * b64[2];
out->data[4] += (uint64_t)limbMul;
out->data[5] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[3] * b64[1];
out->data[4] += (uint64_t)limbMul;
out->data[5] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[2] * b64[3];
out->data[5] += (uint64_t)limbMul;
out->data[6] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[3] * b64[2];
out->data[5] += (uint64_t)limbMul;
out->data[6] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[3] * b64[3];
out->data[6] += (uint64_t)limbMul;
out->data[7] = limbMul >> BASE_BITS;
}
* Field square
* Input:
* a[] < 2^64
* Output:
* out[] < 2^67
* - out[0] < 2^64
* - out[1] < 2^64 * 2
* - out[2] < 2^64 * 4
* - out[3] < 2^64 * 5
* - out[4] < 2^64 * 6
* - out[5] < 2^64 * 4
* - out[6] < 2^64 * 3
* - out[7] < 2^64
*/
static void FelemSqr(LongFelem *out, const Felem *a)
{
const uint64_t a64[4] = {(uint64_t)a->data[0], (uint64_t)a->data[1], (uint64_t)a->data[2], (uint64_t)a->data[3]};
uint128_t limbMul;
limbMul = (uint128_t)a64[0] * a64[0];
out->data[0] = (uint64_t)limbMul;
out->data[1] = limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[0] * a64[1];
out->data[1] += (uint64_t)limbMul << 1;
out->data[2] = limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[0] * a64[2];
out->data[2] += (uint64_t)limbMul << 1;
out->data[3] = limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[1] * a64[1];
out->data[2] += (uint64_t)limbMul;
out->data[3] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[0] * a64[3];
out->data[3] += (uint64_t)limbMul << 1;
out->data[4] = limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[1] * a64[2];
out->data[3] += (uint64_t)limbMul << 1;
out->data[4] += limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[1] * a64[3];
out->data[4] += (uint64_t)limbMul << 1;
out->data[5] = limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[2] * a64[2];
out->data[4] += (uint64_t)limbMul;
out->data[5] += limbMul >> BASE_BITS;
limbMul = (uint128_t)a64[2] * a64[3];
out->data[5] += (uint64_t)limbMul << 1;
out->data[6] = limbMul >> (BASE_BITS - 1);
limbMul = (uint128_t)a64[3] * a64[3];
out->data[6] += (uint64_t)limbMul;
out->data[7] = limbMul >> BASE_BITS;
}
static inline void FelemMulReduce(Felem *out, const Felem *a, const Felem *b)
{
LongFelem ltmp;
FelemMul(<mp, a, b);
LongFelemReduce(out, <mp);
}
static inline void FelemSqrReduce(Felem *out, const Felem *in)
{
LongFelem ltmp;
FelemSqr(<mp, in);
LongFelemReduce(out, <mp);
}
static inline void FelemMulReduceToBase(Felem *out, const Felem *a, const Felem *b)
{
LongFelem ltmp;
FelemMul(<mp, a, b);
LongFelemReduce(out, <mp);
FelemReduce(out, out);
}
static inline void FelemSqrReduceToBase(Felem *out, const Felem *in)
{
LongFelem ltmp;
FelemSqr(<mp, in);
LongFelemReduce(out, <mp);
FelemReduce(out, out);
}
* Field element inversion
* From Fermat's little theorem, in^(p - 2) = in^(-1) (mod p)
* in^(-1) = in^(2^256 - 2^224 + 2^192 + 2^96 - 1 - 2) (mod p)
* Input:
* in[] < 2^64
* Output:
* out[] < 2^64
*/
static void FelemInv(Felem *out, const Felem *in)
{
Felem inE3, inEf, inEff, inEffff, inEffffffff, inEfx16Lsh32, inEfffffffd;
FelemAssign(out, in);
FelemSqrReduceToBase(out, out);
FelemMulReduceToBase(out, out, in);
FelemAssign(&inE3, out);
FelemSqrReduceToBase(out, out);
FelemSqrReduceToBase(out, out);
FelemMulReduceToBase(&inEfffffffd, out, in);
FelemMulReduceToBase(out, out, &inE3);
FelemAssign(&inEf, out);
FelemSqrReduceToBase(out, out);
FelemSqrReduceToBase(out, out);
FelemSqrReduceToBase(out, out);
FelemSqrReduceToBase(out, out);
FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd);
FelemMulReduceToBase(out, out, &inEf);
FelemAssign(&inEff, out);
for (int32_t i = 0; i < 8; ++i) {
FelemSqrReduceToBase(out, out);
}
FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd);
FelemMulReduceToBase(out, out, &inEff);
FelemAssign(&inEffff, out);
for (int32_t i = 0; i < 16; ++i) {
FelemSqrReduceToBase(out, out);
}
FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd);
FelemMulReduceToBase(out, out, &inEffff);
FelemAssign(&inEffffffff, out);
for (int32_t i = 0; i < 32; ++i) {
FelemSqrReduceToBase(out, out);
}
FelemAssign(&inEfx16Lsh32, out);
for (int32_t i = 0; i < 32; ++i) {
FelemSqrReduceToBase(out, out);
}
FelemMulReduceToBase(&inEfx16Lsh32, out, &inEfx16Lsh32);
FelemMulReduceToBase(out, out, &inEffffffff);
FelemMulReduceToBase(out, out, in);
for (int32_t i = 0; i < 160; ++i) {
FelemSqrReduceToBase(out, out);
}
FelemMulReduceToBase(out, out, &inEfx16Lsh32);
FelemMulReduceToBase(out, out, &inEfffffffd);
}
static inline void PtAssign(Point *out, const Point *in)
{
FelemAssign(&out->x, &in->x);
FelemAssign(&out->y, &in->y);
FelemAssign(&out->z, &in->z);
}
static inline void PtAssignWithMask(Point *out, const Point *in, const uint128_t mask)
{
FelemAssignWithMask(&out->x, &in->x, mask);
FelemAssignWithMask(&out->y, &in->y, mask);
FelemAssignWithMask(&out->z, &in->z, mask);
}
* point double
* Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
* Number of field operations: 3M + 5S
* delta = Z^2
* gamma = Y^2
* beta = X * gamma
* alpha = 3 * (X - delta) * (X + delta)
* X' = alpha^2 - 8 * beta
* Z' = (Y + Z)^2 - gamma - delta
* Y' = alpha * (4 * beta - X') - 8 * gamma^2
* Input:
* in->x[], in->y[], in->z[] < 2^64
* Output:
* out->x[], out->y[], out->z[] < 2^64
*/
static void PtDouble(Point *out, const Point *in)
{
Felem delta, gamma, beta, alpha;
Felem tmp, tmp2;
LongFelem ltmp, ltmp2;
FelemSqrReduce(&delta, &in->z);
FelemSqrReduceToBase(&gamma, &in->y);
FelemMulReduce(&beta, &in->x, &gamma);
FelemAdd(&tmp, &in->x, &delta);
FelemScale(&tmp, &tmp, 3);
FelemSub(&tmp2, &in->x, &delta);
FelemReduce(&tmp, &tmp);
FelemReduce(&tmp2, &tmp2);
FelemMulReduceToBase(&alpha, &tmp, &tmp2);
FelemSqrReduce(&tmp, &alpha);
FelemScale(&tmp2, &beta, 8);
FelemSub(&out->x, &tmp, &tmp2);
FelemReduce(&out->x, &out->x);
FelemAdd(&tmp, &in->y, &in->z);
FelemReduce(&tmp, &tmp);
FelemSqrReduce(&tmp, &tmp);
FelemAdd(&tmp2, &gamma, &delta);
FelemSub(&out->z, &tmp, &tmp2);
FelemReduce(&out->z, &out->z);
FelemScale(&beta, &beta, 4);
FelemSub(&tmp, &beta, &out->x);
FelemReduce(&tmp, &tmp);
FelemMul(<mp, &alpha, &tmp);
FelemSqr(<mp2, &gamma);
LongFelemScale(<mp2, <mp2, 8);
LongFelemSub(<mp, <mp, <mp2);
LongFelemReduce(&out->y, <mp);
FelemReduce(&out->y, &out->y);
}
* point addition
* Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
* Infinity point calculation is not supported.
* Number of field operations: 11M + 5S
* Z1Z1 = Z1^2
* Z2Z2 = Z2^2
* U1 = X1 * Z2Z2
* U2 = X2 * Z1Z1
* S1 = Y1 * Z2 * Z2Z2
* S2 = Y2 * Z1 * Z1Z1
* H = U2 - U1
* I = (2 * H)^2
* J = H * I
* r = 2 * (S2 - S1)
* V = U1 * I
* X3 = r^2 - J - 2 * V
* Y3 = r * (V - X3) - 2 * S1 * J
* Z3 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2) * H
* Input:
* a->x[], a->y[], a->z[] < 2^64
* b->x[], b->y[], b->z[] < 2^64
* Output:
* out->x[], out->y[], out->z[] < 2^64
*/
static void PtAdd(Point *out, const Point *a, const Point *b)
{
Point result;
Felem z1sqr, z2sqr, u1, u2, s1, s2, h, i, j, r, v;
Felem tmp;
LongFelem ltmp, ltmp2;
uint128_t isZ1Zero, isZ2Zero;
FelemSqrReduceToBase(&z1sqr, &a->z);
FelemSqrReduceToBase(&z2sqr, &b->z);
isZ1Zero = 0 - FelemIsZero(&z1sqr);
isZ2Zero = 0 - FelemIsZero(&z2sqr);
FelemMulReduceToBase(&u1, &a->x, &z2sqr);
FelemMulReduce(&u2, &b->x, &z1sqr);
FelemMulReduceToBase(&s1, &b->z, &z2sqr);
FelemMulReduceToBase(&s1, &a->y, &s1);
FelemMulReduceToBase(&s2, &a->z, &z1sqr);
FelemMulReduce(&s2, &b->y, &s2);
FelemSub(&h, &u2, &u1);
FelemReduce(&h, &h);
FelemSub(&r, &s2, &s1);
FelemScale(&r, &r, 2);
FelemReduce(&r, &r);
if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
PtDouble(out, b);
return;
}
FelemSqrReduce(&i, &h);
FelemScale(&i, &i, 4);
FelemReduce(&i, &i);
FelemMulReduceToBase(&j, &h, &i);
FelemMulReduce(&v, &u1, &i);
FelemSqrReduce(&result.x, &r);
FelemScale(&tmp, &v, 2);
FelemAdd(&tmp, &j, &tmp);
FelemSub(&result.x, &result.x, &tmp);
FelemReduce(&result.x, &result.x);
FelemSub(&tmp, &v, &result.x);
FelemReduce(&tmp, &tmp);
FelemMul(<mp, &r, &tmp);
FelemMul(<mp2, &s1, &j);
LongFelemScale(<mp2, <mp2, 2);
LongFelemSub(<mp, <mp, <mp2);
LongFelemReduce(&result.y, <mp);
FelemReduce(&result.y, &result.y);
FelemAdd(&result.z, &a->z, &b->z);
FelemReduce(&result.z, &result.z);
FelemSqrReduce(&result.z, &result.z);
FelemAdd(&tmp, &z1sqr, &z2sqr);
FelemSub(&result.z, &result.z, &tmp);
FelemReduce(&result.z, &result.z);
FelemMulReduceToBase(&result.z, &result.z, &h);
PtAssignWithMask(&result, a, isZ2Zero);
PtAssignWithMask(&result, b, isZ1Zero);
PtAssign(out, &result);
}
* Mixed point addition
* Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-madd-2007-bl
* Infinity point calculation is not supported.
* Number of field operations: 7M + 4S
* Z1Z1 = Z1^2
* U2 = X2 * Z1Z1
* S2 = Y2 * Z1 * Z1Z1
* H = U2 - X1
* HH = H^2
* I = 4 * HH
* J = H * I
* r = 2 * (S2 - Y1)
* V = X1 * I
* X3 = r^2 - J - 2 * V
* Y3 = r * (V - X3) - 2 * Y1 * J
* Z3 = (Z1 + H)^2 - Z1Z1 - HH
* Input:
* a->x[] < 2^64, a->y[] < 2^64, a->z[] < 2^64
* b->x < p, b->y < p, b->z = 0 或 1
* Output:
* out->x[] < 2^64
* out->y[] < 2^64
* out->z[] < 2^64
*/
static void PtAddMixed(Point *out, const Point *a, const Point *b)
{
Point result;
Felem z1sqr, u2, s2, h, hsqr, i, j, r, v;
Felem tmp;
LongFelem ltmp, ltmp2;
uint128_t isZ1Zero, isZ2Zero;
FelemSqrReduceToBase(&z1sqr, &a->z);
isZ1Zero = 0 - FelemIsZero(&z1sqr);
isZ2Zero = b->z.data[0] - 1;
FelemMulReduce(&u2, &b->x, &z1sqr);
FelemMulReduceToBase(&s2, &a->z, &z1sqr);
FelemMulReduce(&s2, &b->y, &s2);
FelemSub(&h, &u2, &a->x);
FelemReduce(&h, &h);
FelemSub(&r, &s2, &a->y);
FelemScale(&r, &r, 2);
FelemReduce(&r, &r);
if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
PtDouble(out, b);
return;
}
FelemSqrReduce(&hsqr, &h);
FelemScale(&i, &hsqr, 4);
FelemReduce(&i, &i);
FelemMulReduceToBase(&j, &h, &i);
FelemMulReduce(&v, &a->x, &i);
FelemSqrReduce(&result.x, &r);
FelemScale(&tmp, &v, 2);
FelemAdd(&tmp, &j, &tmp);
FelemSub(&result.x, &result.x, &tmp);
FelemReduce(&result.x, &result.x);
FelemSub(&tmp, &v, &result.x);
FelemReduce(&tmp, &tmp);
FelemMul(<mp, &r, &tmp);
FelemMul(<mp2, &a->y, &j);
LongFelemScale(<mp2, <mp2, 2);
LongFelemSub(<mp, <mp, <mp2);
LongFelemReduce(&result.y, <mp);
FelemReduce(&result.y, &result.y);
FelemAdd(&result.z, &a->z, &h);
FelemReduce(&result.z, &result.z);
FelemSqrReduce(&result.z, &result.z);
FelemAdd(&tmp, &z1sqr, &hsqr);
FelemSub(&result.z, &result.z, &tmp);
FelemReduce(&result.z, &result.z);
PtAssignWithMask(&result, a, isZ2Zero);
PtAssignWithMask(&result, b, isZ1Zero);
PtAssign(out, &result);
}
static inline void GetPointFromTable(Point *point, const Point table[], uint32_t pointNum, const uint32_t index)
{
uint128_t mask;
for (uint32_t i = 0; i < pointNum; i++) {
mask = (0 - (i ^ index)) >> 31;
mask--;
PtAssignWithMask(point, &table[i], mask);
}
}
* Input:
* k1 < n
* 0 <= i < 32
* Output:
* out->x < p, out->y < p, out->z = 0 或 1
*/
static inline void GetLowerPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
{
uint32_t bits;
uint32_t i = (uint32_t)curBit;
bits = (uint32_t)(k1->data[0] >> i) & 1;
bits |= ((uint32_t)(k1->data[1] >> i) & 1) << 1;
bits |= ((uint32_t)(k1->data[2] >> i) & 1) << 2;
bits |= ((uint32_t)(k1->data[3] >> i) & 1) << 3;
GetPointFromTable(out, PRE_MUL_G, TABLE_G_SIZE, bits);
}
* Input:
* k1 < n
* 0 <= i < 32
* Output:
* out->x < p, out->y < p, out->z = 0 或 1
*/
static inline void GetUpperPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
{
uint32_t bits;
uint32_t i = (uint32_t)curBit;
bits = (uint32_t)(k1->data[0] >> (i + BASE_BITS / 2)) & 1;
bits |= ((uint32_t)(k1->data[1] >> (i + BASE_BITS / 2)) & 1) << 1;
bits |= ((uint32_t)(k1->data[2] >> (i + BASE_BITS / 2)) & 1) << 2;
bits |= ((uint32_t)(k1->data[3] >> (i + BASE_BITS / 2)) & 1) << 3;
GetPointFromTable(out, PRE_MUL_G2, TABLE_G_SIZE, bits);
}
* Input:
* k2 < n
* 0 <= i <= 255
* The coordinates of each point of preMulPt are reduced.
* Output:
* out->x[] < 2^64, out->y[] < 2^64, out->z[] < 2^64
*/
static inline void GetPrecomputePtOfP(Point *out, const Felem *k2, int32_t curBit, const Point preMulPt[TABLE_P_SIZE])
{
uint32_t bits;
uint32_t sign, value;
Felem negY;
bits = (uint32_t)FelemGetBits(k2, curBit - 1, WINDOW_SIZE + 1);
DecodeScalarCode(&sign, &value, bits);
GetPointFromTable(out, preMulPt, TABLE_P_SIZE, value);
FelemNeg(&negY, &out->y);
FelemReduce(&negY, &negY);
FelemAssignWithMask(&out->y, &negY, (uint128_t)0 - sign);
}
* Calculate k1 * G + k2 * P
* Input:
* k1 < n
* k2 < n
* The coordinates of each point of preMulPt are reduced.
* Output:
* out->x < p, out->y < p, out->z < p
*/
static void PtMul(Point *out, const Felem *k1, const Felem *k2, const Point preMulPt[TABLE_P_SIZE])
{
Point ptQ = {};
Point ptPre = {};
bool isGMul = k1 != NULL;
bool isPtMul = k2 != NULL && preMulPt != NULL;
int32_t curBit;
if (isPtMul) {
curBit = 255;
GetPrecomputePtOfP(&ptQ, k2, curBit, preMulPt);
} else if (isGMul) {
curBit = 31;
GetLowerPrecomputePtOfG(&ptQ, k1, curBit);
GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
PtAddMixed(&ptQ, &ptQ, &ptPre);
} else {
memset((void *)out, 0, sizeof(Point));
return;
}
while (--curBit >= 0) {
bool isStepGMul = curBit <= 31;
bool isStepPtMul = curBit % WINDOW_SIZE == 0;
PtDouble(&ptQ, &ptQ);
if (isGMul && isStepGMul) {
GetLowerPrecomputePtOfG(&ptPre, k1, curBit);
PtAddMixed(&ptQ, &ptQ, &ptPre);
GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
PtAddMixed(&ptQ, &ptQ, &ptPre);
}
if (isPtMul && isStepPtMul) {
GetPrecomputePtOfP(&ptPre, k2, curBit, preMulPt);
PtAdd(&ptQ, &ptQ, &ptPre);
}
}
FelemContract(&ptQ.x, &ptQ.x);
FelemContract(&ptQ.y, &ptQ.y);
FelemContract(&ptQ.z, &ptQ.z);
PtAssign(out, &ptQ);
}
* Convert Jacobian coordinates to affine coordinates by a given module inverse
* Input:
* in->x[] < 2^64, in->y[] < 2^64
* zInv < 2^64
* Output:
* out->x < p, out->y < p, out->z = 1
*/
static void PtMakeAffineWithInv(Point *out, const Point *in, const Felem *zInv)
{
Felem tmp;
FelemSqrReduceToBase(&tmp, zInv);
FelemMulReduceToBase(&out->x, &in->x, &tmp);
FelemContract(&out->x, &out->x);
FelemMulReduceToBase(&tmp, &tmp, zInv);
FelemMulReduceToBase(&out->y, &in->y, &tmp);
FelemContract(&out->y, &out->y);
FelemSetLimb(&out->z, 1);
}
* Obtain the pre-multiplication table of the input point pt, including 0pt-16pt. All points are reduced.
*/
static int32_t GetPreMulPt(Point preMulPt[TABLE_P_SIZE], const ECC_Point *pt)
{
int32_t ret;
memset((void *)&preMulPt[0], 0, sizeof(Point));
GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].x, &pt->x), ret);
GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].y, &pt->y), ret);
GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].z, &pt->z), ret);
for (uint32_t i = 2; i < 15; i += 2) {
PtDouble(&preMulPt[i], &preMulPt[i >> 1]);
PtAdd(&preMulPt[i + 1], &preMulPt[i], &preMulPt[1]);
}
PtDouble(&preMulPt[16], &preMulPt[16 >> 1]);
ERR:
return ret;
}
int32_t ECP256_PointMulAdd(
ECC_Para *para, ECC_Point *r, const BN_BigNum *k1, const BN_BigNum *k2, const ECC_Point *pt)
{
(void)para;
int32_t ret;
Felem felemK1;
Felem felemK2;
Point preMulPt[TABLE_P_SIZE];
Point out;
GOTO_ERR_IF(CheckBnValid(k1, FELEM_BITS), ret);
GOTO_ERR_IF(CheckBnValid(k2, FELEM_BITS), ret);
GOTO_ERR_IF_EX(BN2Felem(&felemK1, k1), ret);
GOTO_ERR_IF_EX(BN2Felem(&felemK2, k2), ret);
GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), ret);
PtMul(&out, &felemK1, &felemK2, preMulPt);
GOTO_ERR_IF_EX(Felem2BN(&r->x, &out.x), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->y, &out.y), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->z, &out.z), ret);
ERR:
return ret;
}
int32_t ECP256_PointMul(ECC_Para *para, ECC_Point *r, const BN_BigNum *k, const ECC_Point *pt)
{
(void)para;
int32_t ret;
Felem felemK;
Point preMulPt[TABLE_P_SIZE];
Point out;
GOTO_ERR_IF(CheckBnValid(k, FELEM_BITS), ret);
GOTO_ERR_IF_EX(BN2Felem(&felemK, k), ret);
if (pt != NULL) {
GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), ret);
PtMul(&out, NULL, &felemK, preMulPt);
} else {
PtMul(&out, &felemK, NULL, NULL);
}
GOTO_ERR_IF_EX(Felem2BN(&r->x, &out.x), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->y, &out.y), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->z, &out.z), ret);
ERR:
return ret;
}
int32_t ECP256_Point2Affine(const ECC_Para *para, ECC_Point *r, const ECC_Point *pt)
{
(void)para;
int32_t ret;
Point out;
Felem zInv;
GOTO_ERR_IF_EX(BN2Felem(&out.x, &pt->x), ret);
GOTO_ERR_IF_EX(BN2Felem(&out.y, &pt->y), ret);
GOTO_ERR_IF_EX(BN2Felem(&out.z, &pt->z), ret);
FelemInv(&zInv, &out.z);
PtMakeAffineWithInv(&out, &out, &zInv);
GOTO_ERR_IF_EX(Felem2BN(&r->x, &out.x), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->y, &out.y), ret);
GOTO_ERR_IF_EX(Felem2BN(&r->z, &out.z), ret);
ERR:
return ret;
}
int32_t ECP256_ModOrderInv(const ECC_Para *para, BN_BigNum *r, const BN_BigNum *a)
{
if (BN_IsZero(a)) {
BSL_ERR_PUSH_ERROR(CRYPT_ECC_INVERSE_INPUT_ZERO);
return CRYPT_ECC_INVERSE_INPUT_ZERO;
}
return ECP_ModOrderInv(para, r, a);
}
#endif