vendor libsecp256k1 so it doesn't depend on a shared library.

2025-11-18 01:56:30 +01:00 · 2024-12-26 17:05:17 -03:00
parent 95ddacb9f3
commit 46645ad4d6
74 changed files with 36414 additions and 4 deletions
--- a/23
+++ b/23
@@ -0,0 +1,23 @@
+list:
+    @just --list
+
+vendor-libsecp256k1:
+    #!/usr/bin/env fish
+    rm -r libsecp256k1
+    mkdir libsecp256k1
+    mkdir libsecp256k1/include
+    mkdir libsecp256k1/src
+    mkdir libsecp256k1/src/asm
+    mkdir libsecp256k1/src/modules
+    mkdir libsecp256k1/src/modules/extrakeys
+    mkdir libsecp256k1/src/modules/schnorrsig
+
+    wget https://api.github.com/repos/bitcoin-core/secp256k1/tarball/v0.6.0 -O libsecp256k1.tar.gz
+    tar -xvf libsecp256k1.tar.gz
+    rm libsecp256k1.tar.gz
+    cd bitcoin-core-secp256k1-*
+    for f in include/secp256k1.h include/secp256k1_ecdh.h include/secp256k1_ellswift.h include/secp256k1_extrakeys.h include/secp256k1_preallocated.h include/secp256k1_recovery.h include/secp256k1_schnorrsig.h src/asm/field_10x26_arm.s src/assumptions.h src/bench.c src/bench.h src/bench_ecmult.c src/bench_internal.c src/checkmem.h src/ecdsa.h src/ecdsa_impl.h src/eckey.h src/eckey_impl.h src/ecmult.h src/ecmult_compute_table.h src/ecmult_compute_table_impl.h src/ecmult_const.h src/ecmult_const_impl.h src/ecmult_gen.h src/ecmult_gen_compute_table.h src/ecmult_gen_compute_table_impl.h src/ecmult_gen_impl.h src/ecmult_impl.h src/field.h src/field_10x26.h src/field_10x26_impl.h src/field_5x52.h src/field_5x52_impl.h src/field_5x52_int128_impl.h src/field_impl.h src/group.h src/group_impl.h src/hash.h src/hash_impl.h src/hsort.h src/hsort_impl.h src/int128.h src/int128_impl.h src/int128_native.h src/int128_native_impl.h src/int128_struct.h src/int128_struct_impl.h src/modinv32.h src/modinv32_impl.h src/modinv64.h src/modinv64_impl.h src/modules/extrakeys/main_impl.h src/modules/schnorrsig/main_impl.h src/precompute_ecmult.c src/precompute_ecmult_gen.c src/precomputed_ecmult.c src/precomputed_ecmult.h src/precomputed_ecmult_gen.c src/precomputed_ecmult_gen.h src/scalar.h src/scalar_4x64.h src/scalar_4x64_impl.h src/scalar_8x32.h src/scalar_8x32_impl.h src/scalar_impl.h src/scalar_low.h src/scalar_low_impl.h src/scratch.h src/scratch_impl.h src/secp256k1.c src/selftest.h src/util.h
+        mv $f ../libsecp256k1/$f
+    end
+    cd ..
+    rm -r bitcoin-core-secp256k1-*
--- a/libsecp256k1/include/secp256k1.h
+++ b/libsecp256k1/include/secp256k1.h
@@ -0,0 +1,899 @@
+#ifndef SECP256K1_H
+#define SECP256K1_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/** Unless explicitly stated all pointer arguments must not be NULL.
+ *
+ * The following rules specify the order of arguments in API calls:
+ *
+ * 1. Context pointers go first, followed by output arguments, combined
+ *    output/input arguments, and finally input-only arguments.
+ * 2. Array lengths always immediately follow the argument whose length
+ *    they describe, even if this violates rule 1.
+ * 3. Within the OUT/OUTIN/IN groups, pointers to data that is typically generated
+ *    later go first. This means: signatures, public nonces, secret nonces,
+ *    messages, public keys, secret keys, tweaks.
+ * 4. Arguments that are not data pointers go last, from more complex to less
+ *    complex: function pointers, algorithm names, messages, void pointers,
+ *    counts, flags, booleans.
+ * 5. Opaque data pointers follow the function pointer they are to be passed to.
+ */
+
+/** Opaque data structure that holds context information
+ *
+ *  The primary purpose of context objects is to store randomization data for
+ *  enhanced protection against side-channel leakage. This protection is only
+ *  effective if the context is randomized after its creation. See
+ *  secp256k1_context_create for creation of contexts and
+ *  secp256k1_context_randomize for randomization.
+ *
+ *  A secondary purpose of context objects is to store pointers to callback
+ *  functions that the library will call when certain error states arise. See
+ *  secp256k1_context_set_error_callback as well as
+ *  secp256k1_context_set_illegal_callback for details. Future library versions
+ *  may use context objects for additional purposes.
+ *
+ *  A constructed context can safely be used from multiple threads
+ *  simultaneously, but API calls that take a non-const pointer to a context
+ *  need exclusive access to it. In particular this is the case for
+ *  secp256k1_context_destroy, secp256k1_context_preallocated_destroy,
+ *  and secp256k1_context_randomize.
+ *
+ *  Regarding randomization, either do it once at creation time (in which case
+ *  you do not need any locking for the other calls), or use a read-write lock.
+ */
+typedef struct secp256k1_context_struct secp256k1_context;
+
+/** Opaque data structure that holds a parsed and valid public key.
+ *
+ *  The exact representation of data inside is implementation defined and not
+ *  guaranteed to be portable between different platforms or versions. It is
+ *  however guaranteed to be 64 bytes in size, and can be safely copied/moved.
+ *  If you need to convert to a format suitable for storage or transmission,
+ *  use secp256k1_ec_pubkey_serialize and secp256k1_ec_pubkey_parse. To
+ *  compare keys, use secp256k1_ec_pubkey_cmp.
+ */
+typedef struct secp256k1_pubkey {
+    unsigned char data[64];
+} secp256k1_pubkey;
+
+/** Opaque data structure that holds a parsed ECDSA signature.
+ *
+ *  The exact representation of data inside is implementation defined and not
+ *  guaranteed to be portable between different platforms or versions. It is
+ *  however guaranteed to be 64 bytes in size, and can be safely copied/moved.
+ *  If you need to convert to a format suitable for storage, transmission, or
+ *  comparison, use the secp256k1_ecdsa_signature_serialize_* and
+ *  secp256k1_ecdsa_signature_parse_* functions.
+ */
+typedef struct secp256k1_ecdsa_signature {
+    unsigned char data[64];
+} secp256k1_ecdsa_signature;
+
+/** A pointer to a function to deterministically generate a nonce.
+ *
+ * Returns: 1 if a nonce was successfully generated. 0 will cause signing to fail.
+ * Out:     nonce32:   pointer to a 32-byte array to be filled by the function.
+ * In:      msg32:     the 32-byte message hash being verified (will not be NULL)
+ *          key32:     pointer to a 32-byte secret key (will not be NULL)
+ *          algo16:    pointer to a 16-byte array describing the signature
+ *                     algorithm (will be NULL for ECDSA for compatibility).
+ *          data:      Arbitrary data pointer that is passed through.
+ *          attempt:   how many iterations we have tried to find a nonce.
+ *                     This will almost always be 0, but different attempt values
+ *                     are required to result in a different nonce.
+ *
+ * Except for test cases, this function should compute some cryptographic hash of
+ * the message, the algorithm, the key and the attempt.
+ */
+typedef int (*secp256k1_nonce_function)(
+    unsigned char *nonce32,
+    const unsigned char *msg32,
+    const unsigned char *key32,
+    const unsigned char *algo16,
+    void *data,
+    unsigned int attempt
+);
+
+# if !defined(SECP256K1_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+/*  When this header is used at build-time the SECP256K1_BUILD define needs to be set
+ *  to correctly setup export attributes and nullness checks.  This is normally done
+ *  by secp256k1.c but to guard against this header being included before secp256k1.c
+ *  has had a chance to set the define (e.g. via test harnesses that just includes
+ *  secp256k1.c) we set SECP256K1_NO_BUILD when this header is processed without the
+ *  BUILD define so this condition can be caught.
+ */
+#ifndef SECP256K1_BUILD
+# define SECP256K1_NO_BUILD
+#endif
+
+/* Symbol visibility. */
+#if defined(_WIN32)
+  /* GCC for Windows (e.g., MinGW) accepts the __declspec syntax
+   * for MSVC compatibility. A __declspec declaration implies (but is not
+   * exactly equivalent to) __attribute__ ((visibility("default"))), and so we
+   * actually want __declspec even on GCC, see "Microsoft Windows Function
+   * Attributes" in the GCC manual and the recommendations in
+   * https://gcc.gnu.org/wiki/Visibility. */
+# if defined(SECP256K1_BUILD)
+#  if defined(DLL_EXPORT) || defined(SECP256K1_DLL_EXPORT)
+    /* Building libsecp256k1 as a DLL.
+     * 1. If using Libtool, it defines DLL_EXPORT automatically.
+     * 2. In other cases, SECP256K1_DLL_EXPORT must be defined. */
+#   define SECP256K1_API extern __declspec (dllexport)
+#  else
+    /* Building libsecp256k1 as a static library on Windows.
+     * No declspec is needed, and so we would want the non-Windows-specific
+     * logic below take care of this case. However, this may result in setting
+     * __attribute__ ((visibility("default"))), which is supposed to be a noop
+     * on Windows but may trigger warnings when compiling with -flto due to a
+     * bug in GCC, see
+     * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116478 . */
+#   define SECP256K1_API extern
+#  endif
+  /* The user must define SECP256K1_STATIC when consuming libsecp256k1 as a static
+   * library on Windows. */
+# elif !defined(SECP256K1_STATIC)
+   /* Consuming libsecp256k1 as a DLL. */
+#  define SECP256K1_API extern __declspec (dllimport)
+# endif
+#endif
+#ifndef SECP256K1_API
+/* All cases not captured by the Windows-specific logic. */
+# if defined(__GNUC__) && (__GNUC__ >= 4) && defined(SECP256K1_BUILD)
+   /* Building libsecp256k1 using GCC or compatible. */
+#  define SECP256K1_API extern __attribute__ ((visibility ("default")))
+# else
+   /* Fall back to standard C's extern. */
+#  define SECP256K1_API extern
+# endif
+#endif
+
+/* Warning attributes
+ * NONNULL is not used if SECP256K1_BUILD is set to avoid the compiler optimizing out
+ * some paranoid null checks. */
+# if defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+# else
+#  define SECP256K1_WARN_UNUSED_RESULT
+# endif
+# if !defined(SECP256K1_BUILD) && defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_ARG_NONNULL(_x)  __attribute__ ((__nonnull__(_x)))
+# else
+#  define SECP256K1_ARG_NONNULL(_x)
+# endif
+
+/* Attribute for marking functions, types, and variables as deprecated */
+#if !defined(SECP256K1_BUILD) && defined(__has_attribute)
+# if __has_attribute(__deprecated__)
+#  define SECP256K1_DEPRECATED(_msg) __attribute__ ((__deprecated__(_msg)))
+# else
+#  define SECP256K1_DEPRECATED(_msg)
+# endif
+#else
+# define SECP256K1_DEPRECATED(_msg)
+#endif
+
+/* All flags' lower 8 bits indicate what they're for. Do not use directly. */
+#define SECP256K1_FLAGS_TYPE_MASK ((1 << 8) - 1)
+#define SECP256K1_FLAGS_TYPE_CONTEXT (1 << 0)
+#define SECP256K1_FLAGS_TYPE_COMPRESSION (1 << 1)
+/* The higher bits contain the actual data. Do not use directly. */
+#define SECP256K1_FLAGS_BIT_CONTEXT_VERIFY (1 << 8)
+#define SECP256K1_FLAGS_BIT_CONTEXT_SIGN (1 << 9)
+#define SECP256K1_FLAGS_BIT_CONTEXT_DECLASSIFY (1 << 10)
+#define SECP256K1_FLAGS_BIT_COMPRESSION (1 << 8)
+
+/** Context flags to pass to secp256k1_context_create, secp256k1_context_preallocated_size, and
+ *  secp256k1_context_preallocated_create. */
+#define SECP256K1_CONTEXT_NONE (SECP256K1_FLAGS_TYPE_CONTEXT)
+
+/** Deprecated context flags. These flags are treated equivalent to SECP256K1_CONTEXT_NONE. */
+#define SECP256K1_CONTEXT_VERIFY (SECP256K1_FLAGS_TYPE_CONTEXT | SECP256K1_FLAGS_BIT_CONTEXT_VERIFY)
+#define SECP256K1_CONTEXT_SIGN (SECP256K1_FLAGS_TYPE_CONTEXT | SECP256K1_FLAGS_BIT_CONTEXT_SIGN)
+
+/* Testing flag. Do not use. */
+#define SECP256K1_CONTEXT_DECLASSIFY (SECP256K1_FLAGS_TYPE_CONTEXT | SECP256K1_FLAGS_BIT_CONTEXT_DECLASSIFY)
+
+/** Flag to pass to secp256k1_ec_pubkey_serialize. */
+#define SECP256K1_EC_COMPRESSED (SECP256K1_FLAGS_TYPE_COMPRESSION | SECP256K1_FLAGS_BIT_COMPRESSION)
+#define SECP256K1_EC_UNCOMPRESSED (SECP256K1_FLAGS_TYPE_COMPRESSION)
+
+/** Prefix byte used to tag various encoded curvepoints for specific purposes */
+#define SECP256K1_TAG_PUBKEY_EVEN 0x02
+#define SECP256K1_TAG_PUBKEY_ODD 0x03
+#define SECP256K1_TAG_PUBKEY_UNCOMPRESSED 0x04
+#define SECP256K1_TAG_PUBKEY_HYBRID_EVEN 0x06
+#define SECP256K1_TAG_PUBKEY_HYBRID_ODD 0x07
+
+/** A built-in constant secp256k1 context object with static storage duration, to be
+ *  used in conjunction with secp256k1_selftest.
+ *
+ *  This context object offers *only limited functionality* , i.e., it cannot be used
+ *  for API functions that perform computations involving secret keys, e.g., signing
+ *  and public key generation. If this restriction applies to a specific API function,
+ *  it is mentioned in its documentation. See secp256k1_context_create if you need a
+ *  full context object that supports all functionality offered by the library.
+ *
+ *  It is highly recommended to call secp256k1_selftest before using this context.
+ */
+SECP256K1_API const secp256k1_context *secp256k1_context_static;
+
+/** Deprecated alias for secp256k1_context_static. */
+SECP256K1_API const secp256k1_context *secp256k1_context_no_precomp
+SECP256K1_DEPRECATED("Use secp256k1_context_static instead");
+
+/** Perform basic self tests (to be used in conjunction with secp256k1_context_static)
+ *
+ *  This function performs self tests that detect some serious usage errors and
+ *  similar conditions, e.g., when the library is compiled for the wrong endianness.
+ *  This is a last resort measure to be used in production. The performed tests are
+ *  very rudimentary and are not intended as a replacement for running the test
+ *  binaries.
+ *
+ *  It is highly recommended to call this before using secp256k1_context_static.
+ *  It is not necessary to call this function before using a context created with
+ *  secp256k1_context_create (or secp256k1_context_preallocated_create), which will
+ *  take care of performing the self tests.
+ *
+ *  If the tests fail, this function will call the default error handler to abort the
+ *  program (see secp256k1_context_set_error_callback).
+ */
+SECP256K1_API void secp256k1_selftest(void);
+
+
+/** Create a secp256k1 context object (in dynamically allocated memory).
+ *
+ *  This function uses malloc to allocate memory. It is guaranteed that malloc is
+ *  called at most once for every call of this function. If you need to avoid dynamic
+ *  memory allocation entirely, see secp256k1_context_static and the functions in
+ *  secp256k1_preallocated.h.
+ *
+ *  Returns: pointer to a newly created context object.
+ *  In:      flags: Always set to SECP256K1_CONTEXT_NONE (see below).
+ *
+ *  The only valid non-deprecated flag in recent library versions is
+ *  SECP256K1_CONTEXT_NONE, which will create a context sufficient for all functionality
+ *  offered by the library. All other (deprecated) flags will be treated as equivalent
+ *  to the SECP256K1_CONTEXT_NONE flag. Though the flags parameter primarily exists for
+ *  historical reasons, future versions of the library may introduce new flags.
+ *
+ *  If the context is intended to be used for API functions that perform computations
+ *  involving secret keys, e.g., signing and public key generation, then it is highly
+ *  recommended to call secp256k1_context_randomize on the context before calling
+ *  those API functions. This will provide enhanced protection against side-channel
+ *  leakage, see secp256k1_context_randomize for details.
+ *
+ *  Do not create a new context object for each operation, as construction and
+ *  randomization can take non-negligible time.
+ */
+SECP256K1_API secp256k1_context *secp256k1_context_create(
+    unsigned int flags
+) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Copy a secp256k1 context object (into dynamically allocated memory).
+ *
+ *  This function uses malloc to allocate memory. It is guaranteed that malloc is
+ *  called at most once for every call of this function. If you need to avoid dynamic
+ *  memory allocation entirely, see the functions in secp256k1_preallocated.h.
+ *
+ *  Cloning secp256k1_context_static is not possible, and should not be emulated by
+ *  the caller (e.g., using memcpy). Create a new context instead.
+ *
+ *  Returns: pointer to a newly created context object.
+ *  Args:    ctx: pointer to a context to copy (not secp256k1_context_static).
+ */
+SECP256K1_API secp256k1_context *secp256k1_context_clone(
+    const secp256k1_context *ctx
+) SECP256K1_ARG_NONNULL(1) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Destroy a secp256k1 context object (created in dynamically allocated memory).
+ *
+ *  The context pointer may not be used afterwards.
+ *
+ *  The context to destroy must have been created using secp256k1_context_create
+ *  or secp256k1_context_clone. If the context has instead been created using
+ *  secp256k1_context_preallocated_create or secp256k1_context_preallocated_clone, the
+ *  behaviour is undefined. In that case, secp256k1_context_preallocated_destroy must
+ *  be used instead.
+ *
+ *  Args:   ctx: pointer to a context to destroy, constructed using
+ *               secp256k1_context_create or secp256k1_context_clone
+ *               (i.e., not secp256k1_context_static).
+ */
+SECP256K1_API void secp256k1_context_destroy(
+    secp256k1_context *ctx
+) SECP256K1_ARG_NONNULL(1);
+
+/** Set a callback function to be called when an illegal argument is passed to
+ *  an API call. It will only trigger for violations that are mentioned
+ *  explicitly in the header.
+ *
+ *  The philosophy is that these shouldn't be dealt with through a
+ *  specific return value, as calling code should not have branches to deal with
+ *  the case that this code itself is broken.
+ *
+ *  On the other hand, during debug stage, one would want to be informed about
+ *  such mistakes, and the default (crashing) may be inadvisable.
+ *  When this callback is triggered, the API function called is guaranteed not
+ *  to cause a crash, though its return value and output arguments are
+ *  undefined.
+ *
+ *  When this function has not been called (or called with fn==NULL), then the
+ *  default handler will be used. The library provides a default handler which
+ *  writes the message to stderr and calls abort. This default handler can be
+ *  replaced at link time if the preprocessor macro
+ *  USE_EXTERNAL_DEFAULT_CALLBACKS is defined, which is the case if the build
+ *  has been configured with --enable-external-default-callbacks. Then the
+ *  following two symbols must be provided to link against:
+ *   - void secp256k1_default_illegal_callback_fn(const char *message, void *data);
+ *   - void secp256k1_default_error_callback_fn(const char *message, void *data);
+ *  The library can call these default handlers even before a proper callback data
+ *  pointer could have been set using secp256k1_context_set_illegal_callback or
+ *  secp256k1_context_set_error_callback, e.g., when the creation of a context
+ *  fails. In this case, the corresponding default handler will be called with
+ *  the data pointer argument set to NULL.
+ *
+ *  Args: ctx:  pointer to a context object.
+ *  In:   fun:  pointer to a function to call when an illegal argument is
+ *              passed to the API, taking a message and an opaque pointer.
+ *              (NULL restores the default handler.)
+ *        data: the opaque pointer to pass to fun above, must be NULL for the default handler.
+ *
+ *  See also secp256k1_context_set_error_callback.
+ */
+SECP256K1_API void secp256k1_context_set_illegal_callback(
+    secp256k1_context *ctx,
+    void (*fun)(const char *message, void *data),
+    const void *data
+) SECP256K1_ARG_NONNULL(1);
+
+/** Set a callback function to be called when an internal consistency check
+ *  fails.
+ *
+ *  The default callback writes an error message to stderr and calls abort
+ *  to abort the program.
+ *
+ *  This can only trigger in case of a hardware failure, miscompilation,
+ *  memory corruption, serious bug in the library, or other error would can
+ *  otherwise result in undefined behaviour. It will not trigger due to mere
+ *  incorrect usage of the API (see secp256k1_context_set_illegal_callback
+ *  for that). After this callback returns, anything may happen, including
+ *  crashing.
+ *
+ *  Args: ctx:  pointer to a context object.
+ *  In:   fun:  pointer to a function to call when an internal error occurs,
+ *              taking a message and an opaque pointer (NULL restores the
+ *              default handler, see secp256k1_context_set_illegal_callback
+ *              for details).
+ *        data: the opaque pointer to pass to fun above, must be NULL for the default handler.
+ *
+ *  See also secp256k1_context_set_illegal_callback.
+ */
+SECP256K1_API void secp256k1_context_set_error_callback(
+    secp256k1_context *ctx,
+    void (*fun)(const char *message, void *data),
+    const void *data
+) SECP256K1_ARG_NONNULL(1);
+
+/** Parse a variable-length public key into the pubkey object.
+ *
+ *  Returns: 1 if the public key was fully valid.
+ *           0 if the public key could not be parsed or is invalid.
+ *  Args: ctx:      pointer to a context object.
+ *  Out:  pubkey:   pointer to a pubkey object. If 1 is returned, it is set to a
+ *                  parsed version of input. If not, its value is undefined.
+ *  In:   input:    pointer to a serialized public key
+ *        inputlen: length of the array pointed to by input
+ *
+ *  This function supports parsing compressed (33 bytes, header byte 0x02 or
+ *  0x03), uncompressed (65 bytes, header byte 0x04), or hybrid (65 bytes, header
+ *  byte 0x06 or 0x07) format public keys.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_parse(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const unsigned char *input,
+    size_t inputlen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Serialize a pubkey object into a serialized byte sequence.
+ *
+ *  Returns: 1 always.
+ *  Args:   ctx:        pointer to a context object.
+ *  Out:    output:     pointer to a 65-byte (if compressed==0) or 33-byte (if
+ *                      compressed==1) byte array to place the serialized key
+ *                      in.
+ *  In/Out: outputlen:  pointer to an integer which is initially set to the
+ *                      size of output, and is overwritten with the written
+ *                      size.
+ *  In:     pubkey:     pointer to a secp256k1_pubkey containing an
+ *                      initialized public key.
+ *          flags:      SECP256K1_EC_COMPRESSED if serialization should be in
+ *                      compressed format, otherwise SECP256K1_EC_UNCOMPRESSED.
+ */
+SECP256K1_API int secp256k1_ec_pubkey_serialize(
+    const secp256k1_context *ctx,
+    unsigned char *output,
+    size_t *outputlen,
+    const secp256k1_pubkey *pubkey,
+    unsigned int flags
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Compare two public keys using lexicographic (of compressed serialization) order
+ *
+ *  Returns: <0 if the first public key is less than the second
+ *           >0 if the first public key is greater than the second
+ *           0 if the two public keys are equal
+ *  Args: ctx:      pointer to a context object
+ *  In:   pubkey1:  first public key to compare
+ *        pubkey2:  second public key to compare
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_cmp(
+    const secp256k1_context *ctx,
+    const secp256k1_pubkey *pubkey1,
+    const secp256k1_pubkey *pubkey2
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Sort public keys using lexicographic (of compressed serialization) order
+ *
+ *  Returns: 0 if the arguments are invalid. 1 otherwise.
+ *
+ *  Args:     ctx: pointer to a context object
+ *  In:   pubkeys: array of pointers to pubkeys to sort
+ *      n_pubkeys: number of elements in the pubkeys array
+ */
+SECP256K1_API int secp256k1_ec_pubkey_sort(
+    const secp256k1_context *ctx,
+    const secp256k1_pubkey **pubkeys,
+    size_t n_pubkeys
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Parse an ECDSA signature in compact (64 bytes) format.
+ *
+ *  Returns: 1 when the signature could be parsed, 0 otherwise.
+ *  Args: ctx:      pointer to a context object
+ *  Out:  sig:      pointer to a signature object
+ *  In:   input64:  pointer to the 64-byte array to parse
+ *
+ *  The signature must consist of a 32-byte big endian R value, followed by a
+ *  32-byte big endian S value. If R or S fall outside of [0..order-1], the
+ *  encoding is invalid. R and S with value 0 are allowed in the encoding.
+ *
+ *  After the call, sig will always be initialized. If parsing failed or R or
+ *  S are zero, the resulting sig value is guaranteed to fail verification for
+ *  any message and public key.
+ */
+SECP256K1_API int secp256k1_ecdsa_signature_parse_compact(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_signature *sig,
+    const unsigned char *input64
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Parse a DER ECDSA signature.
+ *
+ *  Returns: 1 when the signature could be parsed, 0 otherwise.
+ *  Args: ctx:      pointer to a context object
+ *  Out:  sig:      pointer to a signature object
+ *  In:   input:    pointer to the signature to be parsed
+ *        inputlen: the length of the array pointed to be input
+ *
+ *  This function will accept any valid DER encoded signature, even if the
+ *  encoded numbers are out of range.
+ *
+ *  After the call, sig will always be initialized. If parsing failed or the
+ *  encoded numbers are out of range, signature verification with it is
+ *  guaranteed to fail for every message and public key.
+ */
+SECP256K1_API int secp256k1_ecdsa_signature_parse_der(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_signature *sig,
+    const unsigned char *input,
+    size_t inputlen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Serialize an ECDSA signature in DER format.
+ *
+ *  Returns: 1 if enough space was available to serialize, 0 otherwise
+ *  Args:   ctx:       pointer to a context object
+ *  Out:    output:    pointer to an array to store the DER serialization
+ *  In/Out: outputlen: pointer to a length integer. Initially, this integer
+ *                     should be set to the length of output. After the call
+ *                     it will be set to the length of the serialization (even
+ *                     if 0 was returned).
+ *  In:     sig:       pointer to an initialized signature object
+ */
+SECP256K1_API int secp256k1_ecdsa_signature_serialize_der(
+    const secp256k1_context *ctx,
+    unsigned char *output,
+    size_t *outputlen,
+    const secp256k1_ecdsa_signature *sig
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Serialize an ECDSA signature in compact (64 byte) format.
+ *
+ *  Returns: 1
+ *  Args:   ctx:       pointer to a context object
+ *  Out:    output64:  pointer to a 64-byte array to store the compact serialization
+ *  In:     sig:       pointer to an initialized signature object
+ *
+ *  See secp256k1_ecdsa_signature_parse_compact for details about the encoding.
+ */
+SECP256K1_API int secp256k1_ecdsa_signature_serialize_compact(
+    const secp256k1_context *ctx,
+    unsigned char *output64,
+    const secp256k1_ecdsa_signature *sig
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Verify an ECDSA signature.
+ *
+ *  Returns: 1: correct signature
+ *           0: incorrect or unparseable signature
+ *  Args:    ctx:       pointer to a context object
+ *  In:      sig:       the signature being verified.
+ *           msghash32: the 32-byte message hash being verified.
+ *                      The verifier must make sure to apply a cryptographic
+ *                      hash function to the message by itself and not accept an
+ *                      msghash32 value directly. Otherwise, it would be easy to
+ *                      create a "valid" signature without knowledge of the
+ *                      secret key. See also
+ *                      https://bitcoin.stackexchange.com/a/81116/35586 for more
+ *                      background on this topic.
+ *           pubkey:    pointer to an initialized public key to verify with.
+ *
+ * To avoid accepting malleable signatures, only ECDSA signatures in lower-S
+ * form are accepted.
+ *
+ * If you need to accept ECDSA signatures from sources that do not obey this
+ * rule, apply secp256k1_ecdsa_signature_normalize to the signature prior to
+ * verification, but be aware that doing so results in malleable signatures.
+ *
+ * For details, see the comments for that function.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
+    const secp256k1_context *ctx,
+    const secp256k1_ecdsa_signature *sig,
+    const unsigned char *msghash32,
+    const secp256k1_pubkey *pubkey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Convert a signature to a normalized lower-S form.
+ *
+ *  Returns: 1 if sigin was not normalized, 0 if it already was.
+ *  Args: ctx:    pointer to a context object
+ *  Out:  sigout: pointer to a signature to fill with the normalized form,
+ *                or copy if the input was already normalized. (can be NULL if
+ *                you're only interested in whether the input was already
+ *                normalized).
+ *  In:   sigin:  pointer to a signature to check/normalize (can be identical to sigout)
+ *
+ *  With ECDSA a third-party can forge a second distinct signature of the same
+ *  message, given a single initial signature, but without knowing the key. This
+ *  is done by negating the S value modulo the order of the curve, 'flipping'
+ *  the sign of the random point R which is not included in the signature.
+ *
+ *  Forgery of the same message isn't universally problematic, but in systems
+ *  where message malleability or uniqueness of signatures is important this can
+ *  cause issues. This forgery can be blocked by all verifiers forcing signers
+ *  to use a normalized form.
+ *
+ *  The lower-S form reduces the size of signatures slightly on average when
+ *  variable length encodings (such as DER) are used and is cheap to verify,
+ *  making it a good choice. Security of always using lower-S is assured because
+ *  anyone can trivially modify a signature after the fact to enforce this
+ *  property anyway.
+ *
+ *  The lower S value is always between 0x1 and
+ *  0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0,
+ *  inclusive.
+ *
+ *  No other forms of ECDSA malleability are known and none seem likely, but
+ *  there is no formal proof that ECDSA, even with this additional restriction,
+ *  is free of other malleability. Commonly used serialization schemes will also
+ *  accept various non-unique encodings, so care should be taken when this
+ *  property is required for an application.
+ *
+ *  The secp256k1_ecdsa_sign function will by default create signatures in the
+ *  lower-S form, and secp256k1_ecdsa_verify will not accept others. In case
+ *  signatures come from a system that cannot enforce this property,
+ *  secp256k1_ecdsa_signature_normalize must be called before verification.
+ */
+SECP256K1_API int secp256k1_ecdsa_signature_normalize(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_signature *sigout,
+    const secp256k1_ecdsa_signature *sigin
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3);
+
+/** An implementation of RFC6979 (using HMAC-SHA256) as nonce generation function.
+ * If a data pointer is passed, it is assumed to be a pointer to 32 bytes of
+ * extra entropy.
+ */
+SECP256K1_API const secp256k1_nonce_function secp256k1_nonce_function_rfc6979;
+
+/** A default safe nonce generation function (currently equal to secp256k1_nonce_function_rfc6979). */
+SECP256K1_API const secp256k1_nonce_function secp256k1_nonce_function_default;
+
+/** Create an ECDSA signature.
+ *
+ *  Returns: 1: signature created
+ *           0: the nonce generation function failed, or the secret key was invalid.
+ *  Args:    ctx:       pointer to a context object (not secp256k1_context_static).
+ *  Out:     sig:       pointer to an array where the signature will be placed.
+ *  In:      msghash32: the 32-byte message hash being signed.
+ *           seckey:    pointer to a 32-byte secret key.
+ *           noncefp:   pointer to a nonce generation function. If NULL,
+ *                      secp256k1_nonce_function_default is used.
+ *           ndata:     pointer to arbitrary data used by the nonce generation function
+ *                      (can be NULL). If it is non-NULL and
+ *                      secp256k1_nonce_function_default is used, then ndata must be a
+ *                      pointer to 32-bytes of additional data.
+ *
+ * The created signature is always in lower-S form. See
+ * secp256k1_ecdsa_signature_normalize for more details.
+ */
+SECP256K1_API int secp256k1_ecdsa_sign(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_signature *sig,
+    const unsigned char *msghash32,
+    const unsigned char *seckey,
+    secp256k1_nonce_function noncefp,
+    const void *ndata
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Verify an elliptic curve secret key.
+ *
+ *  A secret key is valid if it is not 0 and less than the secp256k1 curve order
+ *  when interpreted as an integer (most significant byte first). The
+ *  probability of choosing a 32-byte string uniformly at random which is an
+ *  invalid secret key is negligible. However, if it does happen it should
+ *  be assumed that the randomness source is severely broken and there should
+ *  be no retry.
+ *
+ *  Returns: 1: secret key is valid
+ *           0: secret key is invalid
+ *  Args:    ctx: pointer to a context object.
+ *  In:      seckey: pointer to a 32-byte secret key.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_verify(
+    const secp256k1_context *ctx,
+    const unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Compute the public key for a secret key.
+ *
+ *  Returns: 1: secret was valid, public key stores.
+ *           0: secret was invalid, try again.
+ *  Args:    ctx:    pointer to a context object (not secp256k1_context_static).
+ *  Out:     pubkey: pointer to the created public key.
+ *  In:      seckey: pointer to a 32-byte secret key.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_create(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Negates a secret key in place.
+ *
+ *  Returns: 0 if the given secret key is invalid according to
+ *           secp256k1_ec_seckey_verify. 1 otherwise
+ *  Args:   ctx:    pointer to a context object
+ *  In/Out: seckey: pointer to the 32-byte secret key to be negated. If the
+ *                  secret key is invalid according to
+ *                  secp256k1_ec_seckey_verify, this function returns 0 and
+ *                  seckey will be set to some unspecified value.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_negate(
+    const secp256k1_context *ctx,
+    unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Same as secp256k1_ec_seckey_negate, but DEPRECATED. Will be removed in
+ *  future versions. */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_negate(
+    const secp256k1_context *ctx,
+    unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2)
+  SECP256K1_DEPRECATED("Use secp256k1_ec_seckey_negate instead");
+
+/** Negates a public key in place.
+ *
+ *  Returns: 1 always
+ *  Args:   ctx:        pointer to a context object
+ *  In/Out: pubkey:     pointer to the public key to be negated.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_negate(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Tweak a secret key by adding tweak to it.
+ *
+ *  Returns: 0 if the arguments are invalid or the resulting secret key would be
+ *           invalid (only when the tweak is the negation of the secret key). 1
+ *           otherwise.
+ *  Args:    ctx:   pointer to a context object.
+ *  In/Out: seckey: pointer to a 32-byte secret key. If the secret key is
+ *                  invalid according to secp256k1_ec_seckey_verify, this
+ *                  function returns 0. seckey will be set to some unspecified
+ *                  value if this function returns 0.
+ *  In:    tweak32: pointer to a 32-byte tweak, which must be valid according to
+ *                  secp256k1_ec_seckey_verify or 32 zero bytes. For uniformly
+ *                  random 32-byte tweaks, the chance of being invalid is
+ *                  negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_tweak_add(
+    const secp256k1_context *ctx,
+    unsigned char *seckey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Same as secp256k1_ec_seckey_tweak_add, but DEPRECATED. Will be removed in
+ *  future versions. */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_add(
+    const secp256k1_context *ctx,
+    unsigned char *seckey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3)
+  SECP256K1_DEPRECATED("Use secp256k1_ec_seckey_tweak_add instead");
+
+/** Tweak a public key by adding tweak times the generator to it.
+ *
+ *  Returns: 0 if the arguments are invalid or the resulting public key would be
+ *           invalid (only when the tweak is the negation of the corresponding
+ *           secret key). 1 otherwise.
+ *  Args:    ctx:   pointer to a context object.
+ *  In/Out: pubkey: pointer to a public key object. pubkey will be set to an
+ *                  invalid value if this function returns 0.
+ *  In:    tweak32: pointer to a 32-byte tweak, which must be valid according to
+ *                  secp256k1_ec_seckey_verify or 32 zero bytes. For uniformly
+ *                  random 32-byte tweaks, the chance of being invalid is
+ *                  negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_add(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Tweak a secret key by multiplying it by a tweak.
+ *
+ *  Returns: 0 if the arguments are invalid. 1 otherwise.
+ *  Args:   ctx:    pointer to a context object.
+ *  In/Out: seckey: pointer to a 32-byte secret key. If the secret key is
+ *                  invalid according to secp256k1_ec_seckey_verify, this
+ *                  function returns 0. seckey will be set to some unspecified
+ *                  value if this function returns 0.
+ *  In:    tweak32: pointer to a 32-byte tweak. If the tweak is invalid according to
+ *                  secp256k1_ec_seckey_verify, this function returns 0. For
+ *                  uniformly random 32-byte arrays the chance of being invalid
+ *                  is negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_tweak_mul(
+    const secp256k1_context *ctx,
+    unsigned char *seckey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Same as secp256k1_ec_seckey_tweak_mul, but DEPRECATED. Will be removed in
+ *  future versions. */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_mul(
+    const secp256k1_context *ctx,
+    unsigned char *seckey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3)
+  SECP256K1_DEPRECATED("Use secp256k1_ec_seckey_tweak_mul instead");
+
+/** Tweak a public key by multiplying it by a tweak value.
+ *
+ *  Returns: 0 if the arguments are invalid. 1 otherwise.
+ *  Args:    ctx:   pointer to a context object.
+ *  In/Out: pubkey: pointer to a public key object. pubkey will be set to an
+ *                  invalid value if this function returns 0.
+ *  In:    tweak32: pointer to a 32-byte tweak. If the tweak is invalid according to
+ *                  secp256k1_ec_seckey_verify, this function returns 0. For
+ *                  uniformly random 32-byte arrays the chance of being invalid
+ *                  is negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_mul(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Randomizes the context to provide enhanced protection against side-channel leakage.
+ *
+ *  Returns: 1: randomization successful
+ *           0: error
+ *  Args:    ctx:       pointer to a context object (not secp256k1_context_static).
+ *  In:      seed32:    pointer to a 32-byte random seed (NULL resets to initial state).
+ *
+ * While secp256k1 code is written and tested to be constant-time no matter what
+ * secret values are, it is possible that a compiler may output code which is not,
+ * and also that the CPU may not emit the same radio frequencies or draw the same
+ * amount of power for all values. Randomization of the context shields against
+ * side-channel observations which aim to exploit secret-dependent behaviour in
+ * certain computations which involve secret keys.
+ *
+ * It is highly recommended to call this function on contexts returned from
+ * secp256k1_context_create or secp256k1_context_clone (or from the corresponding
+ * functions in secp256k1_preallocated.h) before using these contexts to call API
+ * functions that perform computations involving secret keys, e.g., signing and
+ * public key generation. It is possible to call this function more than once on
+ * the same context, and doing so before every few computations involving secret
+ * keys is recommended as a defense-in-depth measure. Randomization of the static
+ * context secp256k1_context_static is not supported.
+ *
+ * Currently, the random seed is mainly used for blinding multiplications of a
+ * secret scalar with the elliptic curve base point. Multiplications of this
+ * kind are performed by exactly those API functions which are documented to
+ * require a context that is not secp256k1_context_static. As a rule of thumb,
+ * these are all functions which take a secret key (or a keypair) as an input.
+ * A notable exception to that rule is the ECDH module, which relies on a different
+ * kind of elliptic curve point multiplication and thus does not benefit from
+ * enhanced protection against side-channel leakage currently.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_context_randomize(
+    secp256k1_context *ctx,
+    const unsigned char *seed32
+) SECP256K1_ARG_NONNULL(1);
+
+/** Add a number of public keys together.
+ *
+ *  Returns: 1: the sum of the public keys is valid.
+ *           0: the sum of the public keys is not valid.
+ *  Args:   ctx:        pointer to a context object.
+ *  Out:    out:        pointer to a public key object for placing the resulting public key.
+ *  In:     ins:        pointer to array of pointers to public keys.
+ *          n:          the number of public keys to add together (must be at least 1).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_combine(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *out,
+    const secp256k1_pubkey * const *ins,
+    size_t n
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Compute a tagged hash as defined in BIP-340.
+ *
+ *  This is useful for creating a message hash and achieving domain separation
+ *  through an application-specific tag. This function returns
+ *  SHA256(SHA256(tag)||SHA256(tag)||msg). Therefore, tagged hash
+ *  implementations optimized for a specific tag can precompute the SHA256 state
+ *  after hashing the tag hashes.
+ *
+ *  Returns: 1 always.
+ *  Args:    ctx: pointer to a context object
+ *  Out:  hash32: pointer to a 32-byte array to store the resulting hash
+ *  In:      tag: pointer to an array containing the tag
+ *        taglen: length of the tag array
+ *           msg: pointer to an array containing the message
+ *        msglen: length of the message array
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_tagged_sha256(
+    const secp256k1_context *ctx,
+    unsigned char *hash32,
+    const unsigned char *tag,
+    size_t taglen,
+    const unsigned char *msg,
+    size_t msglen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(5);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_H */
--- a/libsecp256k1/include/secp256k1_ecdh.h
+++ b/libsecp256k1/include/secp256k1_ecdh.h
@@ -0,0 +1,63 @@
+#ifndef SECP256K1_ECDH_H
+#define SECP256K1_ECDH_H
+
+#include "secp256k1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** A pointer to a function that hashes an EC point to obtain an ECDH secret
+ *
+ *  Returns: 1 if the point was successfully hashed.
+ *           0 will cause secp256k1_ecdh to fail and return 0.
+ *           Other return values are not allowed, and the behaviour of
+ *           secp256k1_ecdh is undefined for other return values.
+ *  Out:     output:     pointer to an array to be filled by the function
+ *  In:      x32:        pointer to a 32-byte x coordinate
+ *           y32:        pointer to a 32-byte y coordinate
+ *           data:       arbitrary data pointer that is passed through
+ */
+typedef int (*secp256k1_ecdh_hash_function)(
+  unsigned char *output,
+  const unsigned char *x32,
+  const unsigned char *y32,
+  void *data
+);
+
+/** An implementation of SHA256 hash function that applies to compressed public key.
+ * Populates the output parameter with 32 bytes. */
+SECP256K1_API const secp256k1_ecdh_hash_function secp256k1_ecdh_hash_function_sha256;
+
+/** A default ECDH hash function (currently equal to secp256k1_ecdh_hash_function_sha256).
+ * Populates the output parameter with 32 bytes. */
+SECP256K1_API const secp256k1_ecdh_hash_function secp256k1_ecdh_hash_function_default;
+
+/** Compute an EC Diffie-Hellman secret in constant time
+ *
+ *  Returns: 1: exponentiation was successful
+ *           0: scalar was invalid (zero or overflow) or hashfp returned 0
+ *  Args:    ctx:        pointer to a context object.
+ *  Out:     output:     pointer to an array to be filled by hashfp.
+ *  In:      pubkey:     pointer to a secp256k1_pubkey containing an initialized public key.
+ *           seckey:     a 32-byte scalar with which to multiply the point.
+ *           hashfp:     pointer to a hash function. If NULL,
+ *                       secp256k1_ecdh_hash_function_sha256 is used
+ *                       (in which case, 32 bytes will be written to output).
+ *           data:       arbitrary data pointer that is passed through to hashfp
+ *                       (can be NULL for secp256k1_ecdh_hash_function_sha256).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh(
+  const secp256k1_context *ctx,
+  unsigned char *output,
+  const secp256k1_pubkey *pubkey,
+  const unsigned char *seckey,
+  secp256k1_ecdh_hash_function hashfp,
+  void *data
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_ECDH_H */
--- a/libsecp256k1/include/secp256k1_ellswift.h
+++ b/libsecp256k1/include/secp256k1_ellswift.h
@@ -0,0 +1,200 @@
+#ifndef SECP256K1_ELLSWIFT_H
+#define SECP256K1_ELLSWIFT_H
+
+#include "secp256k1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This module provides an implementation of ElligatorSwift as well as a
+ * version of x-only ECDH using it (including compatibility with BIP324).
+ *
+ * ElligatorSwift is described in https://eprint.iacr.org/2022/759 by
+ * Chavez-Saab, Rodriguez-Henriquez, and Tibouchi. It permits encoding
+ * uniformly chosen public keys as 64-byte arrays which are indistinguishable
+ * from uniformly random arrays.
+ *
+ * Let f be the function from pairs of field elements to point X coordinates,
+ * defined as follows (all operations modulo p = 2^256 - 2^32 - 977)
+ * f(u,t):
+ * - Let C = 0xa2d2ba93507f1df233770c2a797962cc61f6d15da14ecd47d8d27ae1cd5f852,
+ *   a square root of -3.
+ * - If u=0, set u=1 instead.
+ * - If t=0, set t=1 instead.
+ * - If u^3 + t^2 + 7 = 0, multiply t by 2.
+ * - Let X = (u^3 + 7 - t^2) / (2 * t)
+ * - Let Y = (X + t) / (C * u)
+ * - Return the first in [u + 4 * Y^2, (-X/Y - u) / 2, (X/Y - u) / 2] that is an
+ *   X coordinate on the curve (at least one of them is, for any u and t).
+ *
+ * Then an ElligatorSwift encoding of x consists of the 32-byte big-endian
+ * encodings of field elements u and t concatenated, where f(u,t) = x.
+ * The encoding algorithm is described in the paper, and effectively picks a
+ * uniformly random pair (u,t) among those which encode x.
+ *
+ * If the Y coordinate is relevant, it is given the same parity as t.
+ *
+ * Changes w.r.t. the paper:
+ * - The u=0, t=0, and u^3+t^2+7=0 conditions result in decoding to the point
+ *   at infinity in the paper. Here they are remapped to finite points.
+ * - The paper uses an additional encoding bit for the parity of y. Here the
+ *   parity of t is used (negating t does not affect the decoded x coordinate,
+ *   so this is possible).
+ *
+ * For mathematical background about the scheme, see the doc/ellswift.md file.
+ */
+
+/** A pointer to a function used by secp256k1_ellswift_xdh to hash the shared X
+ *  coordinate along with the encoded public keys to a uniform shared secret.
+ *
+ *  Returns: 1 if a shared secret was successfully computed.
+ *           0 will cause secp256k1_ellswift_xdh to fail and return 0.
+ *           Other return values are not allowed, and the behaviour of
+ *           secp256k1_ellswift_xdh is undefined for other return values.
+ *  Out:     output:     pointer to an array to be filled by the function
+ *  In:      x32:        pointer to the 32-byte serialized X coordinate
+ *                       of the resulting shared point (will not be NULL)
+ *           ell_a64:    pointer to the 64-byte encoded public key of party A
+ *                       (will not be NULL)
+ *           ell_b64:    pointer to the 64-byte encoded public key of party B
+ *                       (will not be NULL)
+ *           data:       arbitrary data pointer that is passed through
+ */
+typedef int (*secp256k1_ellswift_xdh_hash_function)(
+    unsigned char *output,
+    const unsigned char *x32,
+    const unsigned char *ell_a64,
+    const unsigned char *ell_b64,
+    void *data
+);
+
+/** An implementation of an secp256k1_ellswift_xdh_hash_function which uses
+ *  SHA256(prefix64 || ell_a64 || ell_b64 || x32), where prefix64 is the 64-byte
+ *  array pointed to by data. */
+SECP256K1_API const secp256k1_ellswift_xdh_hash_function secp256k1_ellswift_xdh_hash_function_prefix;
+
+/** An implementation of an secp256k1_ellswift_xdh_hash_function compatible with
+ *  BIP324. It returns H_tag(ell_a64 || ell_b64 || x32), where H_tag is the
+ *  BIP340 tagged hash function with tag "bip324_ellswift_xonly_ecdh". Equivalent
+ *  to secp256k1_ellswift_xdh_hash_function_prefix with prefix64 set to
+ *  SHA256("bip324_ellswift_xonly_ecdh")||SHA256("bip324_ellswift_xonly_ecdh").
+ *  The data argument is ignored. */
+SECP256K1_API const secp256k1_ellswift_xdh_hash_function secp256k1_ellswift_xdh_hash_function_bip324;
+
+/** Construct a 64-byte ElligatorSwift encoding of a given pubkey.
+ *
+ *  Returns: 1 always.
+ *  Args:    ctx:        pointer to a context object
+ *  Out:     ell64:      pointer to a 64-byte array to be filled
+ *  In:      pubkey:     pointer to a secp256k1_pubkey containing an
+ *                       initialized public key
+ *           rnd32:      pointer to 32 bytes of randomness
+ *
+ * It is recommended that rnd32 consists of 32 uniformly random bytes, not
+ * known to any adversary trying to detect whether public keys are being
+ * encoded, though 16 bytes of randomness (padded to an array of 32 bytes,
+ * e.g., with zeros) suffice to make the result indistinguishable from
+ * uniform. The randomness in rnd32 must not be a deterministic function of
+ * the pubkey (it can be derived from the private key, though).
+ *
+ * It is not guaranteed that the computed encoding is stable across versions
+ * of the library, even if all arguments to this function (including rnd32)
+ * are the same.
+ *
+ * This function runs in variable time.
+ */
+SECP256K1_API int secp256k1_ellswift_encode(
+    const secp256k1_context *ctx,
+    unsigned char *ell64,
+    const secp256k1_pubkey *pubkey,
+    const unsigned char *rnd32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Decode a 64-bytes ElligatorSwift encoded public key.
+ *
+ *  Returns: always 1
+ *  Args:    ctx:        pointer to a context object
+ *  Out:     pubkey:     pointer to a secp256k1_pubkey that will be filled
+ *  In:      ell64:      pointer to a 64-byte array to decode
+ *
+ * This function runs in variable time.
+ */
+SECP256K1_API int secp256k1_ellswift_decode(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const unsigned char *ell64
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Compute an ElligatorSwift public key for a secret key.
+ *
+ *  Returns: 1: secret was valid, public key was stored.
+ *           0: secret was invalid, try again.
+ *  Args:    ctx:        pointer to a context object
+ *  Out:     ell64:      pointer to a 64-byte array to receive the ElligatorSwift
+ *                       public key
+ *  In:      seckey32:   pointer to a 32-byte secret key
+ *           auxrnd32:   (optional) pointer to 32 bytes of randomness
+ *
+ * Constant time in seckey and auxrnd32, but not in the resulting public key.
+ *
+ * It is recommended that auxrnd32 contains 32 uniformly random bytes, though
+ * it is optional (and does result in encodings that are indistinguishable from
+ * uniform even without any auxrnd32). It differs from the (mandatory) rnd32
+ * argument to secp256k1_ellswift_encode in this regard.
+ *
+ * This function can be used instead of calling secp256k1_ec_pubkey_create
+ * followed by secp256k1_ellswift_encode. It is safer, as it uses the secret
+ * key as entropy for the encoding (supplemented with auxrnd32, if provided).
+ *
+ * Like secp256k1_ellswift_encode, this function does not guarantee that the
+ * computed encoding is stable across versions of the library, even if all
+ * arguments (including auxrnd32) are the same.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ellswift_create(
+    const secp256k1_context *ctx,
+    unsigned char *ell64,
+    const unsigned char *seckey32,
+    const unsigned char *auxrnd32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Given a private key, and ElligatorSwift public keys sent in both directions,
+ *  compute a shared secret using x-only Elliptic Curve Diffie-Hellman (ECDH).
+ *
+ *  Returns: 1: shared secret was successfully computed
+ *           0: secret was invalid or hashfp returned 0
+ *  Args:    ctx:       pointer to a context object.
+ *  Out:     output:    pointer to an array to be filled by hashfp.
+ *  In:      ell_a64:   pointer to the 64-byte encoded public key of party A
+ *                      (will not be NULL)
+ *           ell_b64:   pointer to the 64-byte encoded public key of party B
+ *                      (will not be NULL)
+ *           seckey32:  pointer to our 32-byte secret key
+ *           party:     boolean indicating which party we are: zero if we are
+ *                      party A, non-zero if we are party B. seckey32 must be
+ *                      the private key corresponding to that party's ell_?64.
+ *                      This correspondence is not checked.
+ *           hashfp:    pointer to a hash function.
+ *           data:      arbitrary data pointer passed through to hashfp.
+ *
+ * Constant time in seckey32.
+ *
+ * This function is more efficient than decoding the public keys, and performing
+ * ECDH on them.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ellswift_xdh(
+  const secp256k1_context *ctx,
+  unsigned char *output,
+  const unsigned char *ell_a64,
+  const unsigned char *ell_b64,
+  const unsigned char *seckey32,
+  int party,
+  secp256k1_ellswift_xdh_hash_function hashfp,
+  void *data
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5) SECP256K1_ARG_NONNULL(7);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_ELLSWIFT_H */
--- a/libsecp256k1/include/secp256k1_extrakeys.h
+++ b/libsecp256k1/include/secp256k1_extrakeys.h
@@ -0,0 +1,250 @@
+#ifndef SECP256K1_EXTRAKEYS_H
+#define SECP256K1_EXTRAKEYS_H
+
+#include "secp256k1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Opaque data structure that holds a parsed and valid "x-only" public key.
+ *  An x-only pubkey encodes a point whose Y coordinate is even. It is
+ *  serialized using only its X coordinate (32 bytes). See BIP-340 for more
+ *  information about x-only pubkeys.
+ *
+ *  The exact representation of data inside is implementation defined and not
+ *  guaranteed to be portable between different platforms or versions. It is
+ *  however guaranteed to be 64 bytes in size, and can be safely copied/moved.
+ *  If you need to convert to a format suitable for storage, transmission, use
+ *  use secp256k1_xonly_pubkey_serialize and secp256k1_xonly_pubkey_parse. To
+ *  compare keys, use secp256k1_xonly_pubkey_cmp.
+ */
+typedef struct secp256k1_xonly_pubkey {
+    unsigned char data[64];
+} secp256k1_xonly_pubkey;
+
+/** Opaque data structure that holds a keypair consisting of a secret and a
+ *  public key.
+ *
+ *  The exact representation of data inside is implementation defined and not
+ *  guaranteed to be portable between different platforms or versions. It is
+ *  however guaranteed to be 96 bytes in size, and can be safely copied/moved.
+ */
+typedef struct secp256k1_keypair {
+    unsigned char data[96];
+} secp256k1_keypair;
+
+/** Parse a 32-byte sequence into a xonly_pubkey object.
+ *
+ *  Returns: 1 if the public key was fully valid.
+ *           0 if the public key could not be parsed or is invalid.
+ *
+ *  Args:   ctx: pointer to a context object.
+ *  Out: pubkey: pointer to a pubkey object. If 1 is returned, it is set to a
+ *               parsed version of input. If not, it's set to an invalid value.
+ *  In: input32: pointer to a serialized xonly_pubkey.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_xonly_pubkey_parse(
+    const secp256k1_context *ctx,
+    secp256k1_xonly_pubkey *pubkey,
+    const unsigned char *input32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Serialize an xonly_pubkey object into a 32-byte sequence.
+ *
+ *  Returns: 1 always.
+ *
+ *  Args:     ctx: pointer to a context object.
+ *  Out: output32: pointer to a 32-byte array to place the serialized key in.
+ *  In:    pubkey: pointer to a secp256k1_xonly_pubkey containing an initialized public key.
+ */
+SECP256K1_API int secp256k1_xonly_pubkey_serialize(
+    const secp256k1_context *ctx,
+    unsigned char *output32,
+    const secp256k1_xonly_pubkey *pubkey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Compare two x-only public keys using lexicographic order
+ *
+ *  Returns: <0 if the first public key is less than the second
+ *           >0 if the first public key is greater than the second
+ *           0 if the two public keys are equal
+ *  Args: ctx:      pointer to a context object.
+ *  In:   pubkey1:  first public key to compare
+ *        pubkey2:  second public key to compare
+ */
+SECP256K1_API int secp256k1_xonly_pubkey_cmp(
+    const secp256k1_context *ctx,
+    const secp256k1_xonly_pubkey *pk1,
+    const secp256k1_xonly_pubkey *pk2
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Converts a secp256k1_pubkey into a secp256k1_xonly_pubkey.
+ *
+ *  Returns: 1 always.
+ *
+ *  Args:         ctx: pointer to a context object.
+ *  Out: xonly_pubkey: pointer to an x-only public key object for placing the converted public key.
+ *          pk_parity: Ignored if NULL. Otherwise, pointer to an integer that
+ *                     will be set to 1 if the point encoded by xonly_pubkey is
+ *                     the negation of the pubkey and set to 0 otherwise.
+ *  In:        pubkey: pointer to a public key that is converted.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_xonly_pubkey_from_pubkey(
+    const secp256k1_context *ctx,
+    secp256k1_xonly_pubkey *xonly_pubkey,
+    int *pk_parity,
+    const secp256k1_pubkey *pubkey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
+
+/** Tweak an x-only public key by adding the generator multiplied with tweak32
+ *  to it.
+ *
+ *  Note that the resulting point can not in general be represented by an x-only
+ *  pubkey because it may have an odd Y coordinate. Instead, the output_pubkey
+ *  is a normal secp256k1_pubkey.
+ *
+ *  Returns: 0 if the arguments are invalid or the resulting public key would be
+ *           invalid (only when the tweak is the negation of the corresponding
+ *           secret key). 1 otherwise.
+ *
+ *  Args:           ctx: pointer to a context object.
+ *  Out:  output_pubkey: pointer to a public key to store the result. Will be set
+ *                       to an invalid value if this function returns 0.
+ *  In: internal_pubkey: pointer to an x-only pubkey to apply the tweak to.
+ *              tweak32: pointer to a 32-byte tweak, which must be valid
+ *                       according to secp256k1_ec_seckey_verify or 32 zero
+ *                       bytes. For uniformly random 32-byte tweaks, the chance of
+ *                       being invalid is negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_xonly_pubkey_tweak_add(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *output_pubkey,
+    const secp256k1_xonly_pubkey *internal_pubkey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Checks that a tweaked pubkey is the result of calling
+ *  secp256k1_xonly_pubkey_tweak_add with internal_pubkey and tweak32.
+ *
+ *  The tweaked pubkey is represented by its 32-byte x-only serialization and
+ *  its pk_parity, which can both be obtained by converting the result of
+ *  tweak_add to a secp256k1_xonly_pubkey.
+ *
+ *  Note that this alone does _not_ verify that the tweaked pubkey is a
+ *  commitment. If the tweak is not chosen in a specific way, the tweaked pubkey
+ *  can easily be the result of a different internal_pubkey and tweak.
+ *
+ *  Returns: 0 if the arguments are invalid or the tweaked pubkey is not the
+ *           result of tweaking the internal_pubkey with tweak32. 1 otherwise.
+ *  Args:            ctx: pointer to a context object.
+ *  In: tweaked_pubkey32: pointer to a serialized xonly_pubkey.
+ *     tweaked_pk_parity: the parity of the tweaked pubkey (whose serialization
+ *                        is passed in as tweaked_pubkey32). This must match the
+ *                        pk_parity value that is returned when calling
+ *                        secp256k1_xonly_pubkey with the tweaked pubkey, or
+ *                        this function will fail.
+ *       internal_pubkey: pointer to an x-only public key object to apply the tweak to.
+ *               tweak32: pointer to a 32-byte tweak.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_xonly_pubkey_tweak_add_check(
+    const secp256k1_context *ctx,
+    const unsigned char *tweaked_pubkey32,
+    int tweaked_pk_parity,
+    const secp256k1_xonly_pubkey *internal_pubkey,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
+
+/** Compute the keypair for a valid secret key.
+ *
+ *  See the documentation of `secp256k1_ec_seckey_verify` for more information
+ *  about the validity of secret keys.
+ *
+ *  Returns: 1: secret key is valid
+ *           0: secret key is invalid
+ *  Args:    ctx: pointer to a context object (not secp256k1_context_static).
+ *  Out: keypair: pointer to the created keypair.
+ *  In:   seckey: pointer to a 32-byte secret key.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_keypair_create(
+    const secp256k1_context *ctx,
+    secp256k1_keypair *keypair,
+    const unsigned char *seckey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Get the secret key from a keypair.
+ *
+ *  Returns: 1 always.
+ *  Args:   ctx: pointer to a context object.
+ *  Out: seckey: pointer to a 32-byte buffer for the secret key.
+ *  In: keypair: pointer to a keypair.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_keypair_sec(
+    const secp256k1_context *ctx,
+    unsigned char *seckey,
+    const secp256k1_keypair *keypair
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Get the public key from a keypair.
+ *
+ *  Returns: 1 always.
+ *  Args:   ctx: pointer to a context object.
+ *  Out: pubkey: pointer to a pubkey object, set to the keypair public key.
+ *  In: keypair: pointer to a keypair.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_keypair_pub(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const secp256k1_keypair *keypair
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Get the x-only public key from a keypair.
+ *
+ *  This is the same as calling secp256k1_keypair_pub and then
+ *  secp256k1_xonly_pubkey_from_pubkey.
+ *
+ *  Returns: 1 always.
+ *  Args:   ctx: pointer to a context object.
+ *  Out: pubkey: pointer to an xonly_pubkey object, set to the keypair
+ *               public key after converting it to an xonly_pubkey.
+ *    pk_parity: Ignored if NULL. Otherwise, pointer to an integer that will be set to the
+ *               pk_parity argument of secp256k1_xonly_pubkey_from_pubkey.
+ *  In: keypair: pointer to a keypair.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_keypair_xonly_pub(
+    const secp256k1_context *ctx,
+    secp256k1_xonly_pubkey *pubkey,
+    int *pk_parity,
+    const secp256k1_keypair *keypair
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
+
+/** Tweak a keypair by adding tweak32 to the secret key and updating the public
+ *  key accordingly.
+ *
+ *  Calling this function and then secp256k1_keypair_pub results in the same
+ *  public key as calling secp256k1_keypair_xonly_pub and then
+ *  secp256k1_xonly_pubkey_tweak_add.
+ *
+ *  Returns: 0 if the arguments are invalid or the resulting keypair would be
+ *           invalid (only when the tweak is the negation of the keypair's
+ *           secret key). 1 otherwise.
+ *
+ *  Args:       ctx: pointer to a context object.
+ *  In/Out: keypair: pointer to a keypair to apply the tweak to. Will be set to
+ *                   an invalid value if this function returns 0.
+ *  In:     tweak32: pointer to a 32-byte tweak, which must be valid according to
+ *                   secp256k1_ec_seckey_verify or 32 zero bytes. For uniformly
+ *                   random 32-byte tweaks, the chance of being invalid is
+ *                   negligible (around 1 in 2^128).
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_keypair_xonly_tweak_add(
+    const secp256k1_context *ctx,
+    secp256k1_keypair *keypair,
+    const unsigned char *tweak32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_EXTRAKEYS_H */
--- a/libsecp256k1/include/secp256k1_preallocated.h
+++ b/libsecp256k1/include/secp256k1_preallocated.h
@@ -0,0 +1,134 @@
+#ifndef SECP256K1_PREALLOCATED_H
+#define SECP256K1_PREALLOCATED_H
+
+#include "secp256k1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The module provided by this header file is intended for settings in which it
+ * is not possible or desirable to rely on dynamic memory allocation. It provides
+ * functions for creating, cloning, and destroying secp256k1 context objects in a
+ * contiguous fixed-size block of memory provided by the caller.
+ *
+ * Context objects created by functions in this module can be used like contexts
+ * objects created by functions in secp256k1.h, i.e., they can be passed to any
+ * API function that expects a context object (see secp256k1.h for details). The
+ * only exception is that context objects created by functions in this module
+ * must be destroyed using secp256k1_context_preallocated_destroy (in this
+ * module) instead of secp256k1_context_destroy (in secp256k1.h).
+ *
+ * It is guaranteed that functions in this module will not call malloc or its
+ * friends realloc, calloc, and free.
+ */
+
+/** Determine the memory size of a secp256k1 context object to be created in
+ *  caller-provided memory.
+ *
+ *  The purpose of this function is to determine how much memory must be provided
+ *  to secp256k1_context_preallocated_create.
+ *
+ *  Returns: the required size of the caller-provided memory block
+ *  In:      flags:    which parts of the context to initialize.
+ */
+SECP256K1_API size_t secp256k1_context_preallocated_size(
+    unsigned int flags
+) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Create a secp256k1 context object in caller-provided memory.
+ *
+ *  The caller must provide a pointer to a rewritable contiguous block of memory
+ *  of size at least secp256k1_context_preallocated_size(flags) bytes, suitably
+ *  aligned to hold an object of any type.
+ *
+ *  The block of memory is exclusively owned by the created context object during
+ *  the lifetime of this context object, which begins with the call to this
+ *  function and ends when a call to secp256k1_context_preallocated_destroy
+ *  (which destroys the context object again) returns. During the lifetime of the
+ *  context object, the caller is obligated not to access this block of memory,
+ *  i.e., the caller may not read or write the memory, e.g., by copying the memory
+ *  contents to a different location or trying to create a second context object
+ *  in the memory. In simpler words, the prealloc pointer (or any pointer derived
+ *  from it) should not be used during the lifetime of the context object.
+ *
+ *  Returns: pointer to newly created context object.
+ *  In:      prealloc: pointer to a rewritable contiguous block of memory of
+ *                     size at least secp256k1_context_preallocated_size(flags)
+ *                     bytes, as detailed above.
+ *           flags:    which parts of the context to initialize.
+ *
+ *  See secp256k1_context_create (in secp256k1.h) for further details.
+ *
+ *  See also secp256k1_context_randomize (in secp256k1.h)
+ *  and secp256k1_context_preallocated_destroy.
+ */
+SECP256K1_API secp256k1_context *secp256k1_context_preallocated_create(
+    void *prealloc,
+    unsigned int flags
+) SECP256K1_ARG_NONNULL(1) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Determine the memory size of a secp256k1 context object to be copied into
+ *  caller-provided memory.
+ *
+ *  Returns: the required size of the caller-provided memory block.
+ *  In:      ctx: pointer to a context to copy.
+ */
+SECP256K1_API size_t secp256k1_context_preallocated_clone_size(
+    const secp256k1_context *ctx
+) SECP256K1_ARG_NONNULL(1) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Copy a secp256k1 context object into caller-provided memory.
+ *
+ *  The caller must provide a pointer to a rewritable contiguous block of memory
+ *  of size at least secp256k1_context_preallocated_size(flags) bytes, suitably
+ *  aligned to hold an object of any type.
+ *
+ *  The block of memory is exclusively owned by the created context object during
+ *  the lifetime of this context object, see the description of
+ *  secp256k1_context_preallocated_create for details.
+ *
+ *  Cloning secp256k1_context_static is not possible, and should not be emulated by
+ *  the caller (e.g., using memcpy). Create a new context instead.
+ *
+ *  Returns: pointer to a newly created context object.
+ *  Args:    ctx:      pointer to a context to copy (not secp256k1_context_static).
+ *  In:      prealloc: pointer to a rewritable contiguous block of memory of
+ *                     size at least secp256k1_context_preallocated_size(flags)
+ *                     bytes, as detailed above.
+ */
+SECP256K1_API secp256k1_context *secp256k1_context_preallocated_clone(
+    const secp256k1_context *ctx,
+    void *prealloc
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_WARN_UNUSED_RESULT;
+
+/** Destroy a secp256k1 context object that has been created in
+ *  caller-provided memory.
+ *
+ *  The context pointer may not be used afterwards.
+ *
+ *  The context to destroy must have been created using
+ *  secp256k1_context_preallocated_create or secp256k1_context_preallocated_clone.
+ *  If the context has instead been created using secp256k1_context_create or
+ *  secp256k1_context_clone, the behaviour is undefined. In that case,
+ *  secp256k1_context_destroy must be used instead.
+ *
+ *  If required, it is the responsibility of the caller to deallocate the block
+ *  of memory properly after this function returns, e.g., by calling free on the
+ *  preallocated pointer given to secp256k1_context_preallocated_create or
+ *  secp256k1_context_preallocated_clone.
+ *
+ *  Args:   ctx: pointer to a context to destroy, constructed using
+ *               secp256k1_context_preallocated_create or
+ *               secp256k1_context_preallocated_clone
+ *               (i.e., not secp256k1_context_static).
+ */
+SECP256K1_API void secp256k1_context_preallocated_destroy(
+    secp256k1_context *ctx
+) SECP256K1_ARG_NONNULL(1);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_PREALLOCATED_H */
--- a/libsecp256k1/include/secp256k1_recovery.h
+++ b/libsecp256k1/include/secp256k1_recovery.h
@@ -0,0 +1,113 @@
+#ifndef SECP256K1_RECOVERY_H
+#define SECP256K1_RECOVERY_H
+
+#include "secp256k1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Opaque data structure that holds a parsed ECDSA signature,
+ *  supporting pubkey recovery.
+ *
+ *  The exact representation of data inside is implementation defined and not
+ *  guaranteed to be portable between different platforms or versions. It is
+ *  however guaranteed to be 65 bytes in size, and can be safely copied/moved.
+ *  If you need to convert to a format suitable for storage or transmission, use
+ *  the secp256k1_ecdsa_signature_serialize_* and
+ *  secp256k1_ecdsa_signature_parse_* functions.
+ *
+ *  Furthermore, it is guaranteed that identical signatures (including their
+ *  recoverability) will have identical representation, so they can be
+ *  memcmp'ed.
+ */
+typedef struct secp256k1_ecdsa_recoverable_signature {
+    unsigned char data[65];
+} secp256k1_ecdsa_recoverable_signature;
+
+/** Parse a compact ECDSA signature (64 bytes + recovery id).
+ *
+ *  Returns: 1 when the signature could be parsed, 0 otherwise
+ *  Args: ctx:     pointer to a context object
+ *  Out:  sig:     pointer to a signature object
+ *  In:   input64: pointer to a 64-byte compact signature
+ *        recid:   the recovery id (0, 1, 2 or 3)
+ */
+SECP256K1_API int secp256k1_ecdsa_recoverable_signature_parse_compact(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_recoverable_signature *sig,
+    const unsigned char *input64,
+    int recid
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Convert a recoverable signature into a normal signature.
+ *
+ *  Returns: 1
+ *  Args: ctx:    pointer to a context object.
+ *  Out:  sig:    pointer to a normal signature.
+ *  In:   sigin:  pointer to a recoverable signature.
+ */
+SECP256K1_API int secp256k1_ecdsa_recoverable_signature_convert(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_signature *sig,
+    const secp256k1_ecdsa_recoverable_signature *sigin
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Serialize an ECDSA signature in compact format (64 bytes + recovery id).
+ *
+ *  Returns: 1
+ *  Args: ctx:      pointer to a context object.
+ *  Out:  output64: pointer to a 64-byte array of the compact signature.
+ *        recid:    pointer to an integer to hold the recovery id.
+ *  In:   sig:      pointer to an initialized signature object.
+ */
+SECP256K1_API int secp256k1_ecdsa_recoverable_signature_serialize_compact(
+    const secp256k1_context *ctx,
+    unsigned char *output64,
+    int *recid,
+    const secp256k1_ecdsa_recoverable_signature *sig
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Create a recoverable ECDSA signature.
+ *
+ *  Returns: 1: signature created
+ *           0: the nonce generation function failed, or the secret key was invalid.
+ *  Args:    ctx:       pointer to a context object (not secp256k1_context_static).
+ *  Out:     sig:       pointer to an array where the signature will be placed.
+ *  In:      msghash32: the 32-byte message hash being signed.
+ *           seckey:    pointer to a 32-byte secret key.
+ *           noncefp:   pointer to a nonce generation function. If NULL,
+ *                      secp256k1_nonce_function_default is used.
+ *           ndata:     pointer to arbitrary data used by the nonce generation function
+ *                      (can be NULL for secp256k1_nonce_function_default).
+ */
+SECP256K1_API int secp256k1_ecdsa_sign_recoverable(
+    const secp256k1_context *ctx,
+    secp256k1_ecdsa_recoverable_signature *sig,
+    const unsigned char *msghash32,
+    const unsigned char *seckey,
+    secp256k1_nonce_function noncefp,
+    const void *ndata
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Recover an ECDSA public key from a signature.
+ *
+ *  Returns: 1: public key successfully recovered (which guarantees a correct signature).
+ *           0: otherwise.
+ *  Args:    ctx:       pointer to a context object.
+ *  Out:     pubkey:    pointer to the recovered public key.
+ *  In:      sig:       pointer to initialized signature that supports pubkey recovery.
+ *           msghash32: the 32-byte message hash assumed to be signed.
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover(
+    const secp256k1_context *ctx,
+    secp256k1_pubkey *pubkey,
+    const secp256k1_ecdsa_recoverable_signature *sig,
+    const unsigned char *msghash32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_RECOVERY_H */
--- a/libsecp256k1/include/secp256k1_schnorrsig.h
+++ b/libsecp256k1/include/secp256k1_schnorrsig.h
@@ -0,0 +1,190 @@
+#ifndef SECP256K1_SCHNORRSIG_H
+#define SECP256K1_SCHNORRSIG_H
+
+#include "secp256k1.h"
+#include "secp256k1_extrakeys.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** This module implements a variant of Schnorr signatures compliant with
+ *  Bitcoin Improvement Proposal 340 "Schnorr Signatures for secp256k1"
+ *  (https://github.com/bitcoin/bips/blob/master/bip-0340.mediawiki).
+ */
+
+/** A pointer to a function to deterministically generate a nonce.
+ *
+ *  Same as secp256k1_nonce function with the exception of accepting an
+ *  additional pubkey argument and not requiring an attempt argument. The pubkey
+ *  argument can protect signature schemes with key-prefixed challenge hash
+ *  inputs against reusing the nonce when signing with the wrong precomputed
+ *  pubkey.
+ *
+ *  Returns: 1 if a nonce was successfully generated. 0 will cause signing to
+ *           return an error.
+ *  Out:  nonce32: pointer to a 32-byte array to be filled by the function
+ *  In:       msg: the message being verified. Is NULL if and only if msglen
+ *                 is 0.
+ *         msglen: the length of the message
+ *          key32: pointer to a 32-byte secret key (will not be NULL)
+ *     xonly_pk32: the 32-byte serialized xonly pubkey corresponding to key32
+ *                 (will not be NULL)
+ *           algo: pointer to an array describing the signature
+ *                 algorithm (will not be NULL)
+ *        algolen: the length of the algo array
+ *           data: arbitrary data pointer that is passed through
+ *
+ *  Except for test cases, this function should compute some cryptographic hash of
+ *  the message, the key, the pubkey, the algorithm description, and data.
+ */
+typedef int (*secp256k1_nonce_function_hardened)(
+    unsigned char *nonce32,
+    const unsigned char *msg,
+    size_t msglen,
+    const unsigned char *key32,
+    const unsigned char *xonly_pk32,
+    const unsigned char *algo,
+    size_t algolen,
+    void *data
+);
+
+/** An implementation of the nonce generation function as defined in Bitcoin
+ *  Improvement Proposal 340 "Schnorr Signatures for secp256k1"
+ *  (https://github.com/bitcoin/bips/blob/master/bip-0340.mediawiki).
+ *
+ *  If a data pointer is passed, it is assumed to be a pointer to 32 bytes of
+ *  auxiliary random data as defined in BIP-340. If the data pointer is NULL,
+ *  the nonce derivation procedure follows BIP-340 by setting the auxiliary
+ *  random data to zero. The algo argument must be non-NULL, otherwise the
+ *  function will fail and return 0. The hash will be tagged with algo.
+ *  Therefore, to create BIP-340 compliant signatures, algo must be set to
+ *  "BIP0340/nonce" and algolen to 13.
+ */
+SECP256K1_API const secp256k1_nonce_function_hardened secp256k1_nonce_function_bip340;
+
+/** Data structure that contains additional arguments for schnorrsig_sign_custom.
+ *
+ *  A schnorrsig_extraparams structure object can be initialized correctly by
+ *  setting it to SECP256K1_SCHNORRSIG_EXTRAPARAMS_INIT.
+ *
+ *  Members:
+ *      magic: set to SECP256K1_SCHNORRSIG_EXTRAPARAMS_MAGIC at initialization
+ *             and has no other function than making sure the object is
+ *             initialized.
+ *    noncefp: pointer to a nonce generation function. If NULL,
+ *             secp256k1_nonce_function_bip340 is used
+ *      ndata: pointer to arbitrary data used by the nonce generation function
+ *             (can be NULL). If it is non-NULL and
+ *             secp256k1_nonce_function_bip340 is used, then ndata must be a
+ *             pointer to 32-byte auxiliary randomness as per BIP-340.
+ */
+typedef struct secp256k1_schnorrsig_extraparams {
+    unsigned char magic[4];
+    secp256k1_nonce_function_hardened noncefp;
+    void *ndata;
+} secp256k1_schnorrsig_extraparams;
+
+#define SECP256K1_SCHNORRSIG_EXTRAPARAMS_MAGIC { 0xda, 0x6f, 0xb3, 0x8c }
+#define SECP256K1_SCHNORRSIG_EXTRAPARAMS_INIT {\
+    SECP256K1_SCHNORRSIG_EXTRAPARAMS_MAGIC,\
+    NULL,\
+    NULL\
+}
+
+/** Create a Schnorr signature.
+ *
+ *  Does _not_ strictly follow BIP-340 because it does not verify the resulting
+ *  signature. Instead, you can manually use secp256k1_schnorrsig_verify and
+ *  abort if it fails.
+ *
+ *  This function only signs 32-byte messages. If you have messages of a
+ *  different size (or the same size but without a context-specific tag
+ *  prefix), it is recommended to create a 32-byte message hash with
+ *  secp256k1_tagged_sha256 and then sign the hash. Tagged hashing allows
+ *  providing an context-specific tag for domain separation. This prevents
+ *  signatures from being valid in multiple contexts by accident.
+ *
+ *  Returns 1 on success, 0 on failure.
+ *  Args:    ctx: pointer to a context object (not secp256k1_context_static).
+ *  Out:   sig64: pointer to a 64-byte array to store the serialized signature.
+ *  In:    msg32: the 32-byte message being signed.
+ *       keypair: pointer to an initialized keypair.
+ *    aux_rand32: 32 bytes of fresh randomness. While recommended to provide
+ *                this, it is only supplemental to security and can be NULL. A
+ *                NULL argument is treated the same as an all-zero one. See
+ *                BIP-340 "Default Signing" for a full explanation of this
+ *                argument and for guidance if randomness is expensive.
+ */
+SECP256K1_API int secp256k1_schnorrsig_sign32(
+    const secp256k1_context *ctx,
+    unsigned char *sig64,
+    const unsigned char *msg32,
+    const secp256k1_keypair *keypair,
+    const unsigned char *aux_rand32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+/** Same as secp256k1_schnorrsig_sign32, but DEPRECATED. Will be removed in
+ *  future versions. */
+SECP256K1_API int secp256k1_schnorrsig_sign(
+    const secp256k1_context *ctx,
+    unsigned char *sig64,
+    const unsigned char *msg32,
+    const secp256k1_keypair *keypair,
+    const unsigned char *aux_rand32
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4)
+  SECP256K1_DEPRECATED("Use secp256k1_schnorrsig_sign32 instead");
+
+/** Create a Schnorr signature with a more flexible API.
+ *
+ *  Same arguments as secp256k1_schnorrsig_sign except that it allows signing
+ *  variable length messages and accepts a pointer to an extraparams object that
+ *  allows customizing signing by passing additional arguments.
+ *
+ *  Equivalent to secp256k1_schnorrsig_sign32(..., aux_rand32) if msglen is 32
+ *  and extraparams is initialized as follows:
+ *  ```
+ *  secp256k1_schnorrsig_extraparams extraparams = SECP256K1_SCHNORRSIG_EXTRAPARAMS_INIT;
+ *  extraparams.ndata = (unsigned char*)aux_rand32;
+ *  ```
+ *
+ *  Returns 1 on success, 0 on failure.
+ *  Args:   ctx: pointer to a context object (not secp256k1_context_static).
+ *  Out:  sig64: pointer to a 64-byte array to store the serialized signature.
+ *  In:     msg: the message being signed. Can only be NULL if msglen is 0.
+ *       msglen: length of the message.
+ *      keypair: pointer to an initialized keypair.
+ *  extraparams: pointer to an extraparams object (can be NULL).
+ */
+SECP256K1_API int secp256k1_schnorrsig_sign_custom(
+    const secp256k1_context *ctx,
+    unsigned char *sig64,
+    const unsigned char *msg,
+    size_t msglen,
+    const secp256k1_keypair *keypair,
+    secp256k1_schnorrsig_extraparams *extraparams
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(5);
+
+/** Verify a Schnorr signature.
+ *
+ *  Returns: 1: correct signature
+ *           0: incorrect signature
+ *  Args:    ctx: pointer to a context object.
+ *  In:    sig64: pointer to the 64-byte signature to verify.
+ *           msg: the message being verified. Can only be NULL if msglen is 0.
+ *        msglen: length of the message
+ *        pubkey: pointer to an x-only public key to verify with
+ */
+SECP256K1_API SECP256K1_WARN_UNUSED_RESULT int secp256k1_schnorrsig_verify(
+    const secp256k1_context *ctx,
+    const unsigned char *sig64,
+    const unsigned char *msg,
+    size_t msglen,
+    const secp256k1_xonly_pubkey *pubkey
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(5);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_SCHNORRSIG_H */
--- a/libsecp256k1/src/asm/field_10x26_arm.s
+++ b/libsecp256k1/src/asm/field_10x26_arm.s
@@ -0,0 +1,916 @@
+@ vim: set tabstop=8 softtabstop=8 shiftwidth=8 noexpandtab syntax=armasm:
+/***********************************************************************
+ * Copyright (c) 2014 Wladimir J. van der Laan                         *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+/*
+ARM implementation of field_10x26 inner loops.
+
+Note:
+
+- To avoid unnecessary loads and make use of available registers, two
+  'passes' have every time been interleaved, with the odd passes accumulating c' and d' 
+  which will be added to c and d respectively in the even passes
+
+*/
+
+	.syntax unified
+	@ eabi attributes - see readelf -A
+	.eabi_attribute 24, 1 @ Tag_ABI_align_needed = 8-byte
+	.eabi_attribute 25, 1 @ Tag_ABI_align_preserved = 8-byte, except leaf SP
+	.text
+
+	@ Field constants
+	.set field_R0, 0x3d10
+	.set field_R1, 0x400
+	.set field_not_M, 0xfc000000	@ ~M = ~0x3ffffff
+
+	.align	2
+	.global secp256k1_fe_mul_inner
+	.type	secp256k1_fe_mul_inner, %function
+	.hidden secp256k1_fe_mul_inner
+	@ Arguments:
+	@  r0  r      Restrict: can overlap with a, not with b
+	@  r1  a
+	@  r2  b
+	@ Stack (total 4+10*4 = 44)
+	@  sp + #0        saved 'r' pointer
+	@  sp + #4 + 4*X  t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
+secp256k1_fe_mul_inner:
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
+	sub	sp, sp, #48			@ frame=44 + alignment
+	str     r0, [sp, #0]			@ save result address, we need it only at the end
+
+	/******************************************
+	 * Main computation code.
+	 ******************************************
+
+	Allocation:
+	    r0,r14,r7,r8   scratch
+	    r1       a (pointer)
+	    r2       b (pointer)
+	    r3:r4    c
+	    r5:r6    d
+	    r11:r12  c'
+	    r9:r10   d'
+
+	Note: do not write to r[] here, it may overlap with a[]
+	*/
+
+	/* A - interleaved with B */
+	ldr	r7, [r1, #0*4]			@ a[0]
+	ldr	r8, [r2, #9*4]			@ b[9]
+	ldr	r0, [r1, #1*4]			@ a[1]
+	umull	r5, r6, r7, r8			@ d = a[0] * b[9]
+	ldr	r14, [r2, #8*4]			@ b[8]
+	umull	r9, r10, r0, r8			@ d' = a[1] * b[9]
+	ldr	r7, [r1, #2*4]			@ a[2]
+	umlal	r5, r6, r0, r14			@ d += a[1] * b[8]
+	ldr	r8, [r2, #7*4] 			@ b[7]
+	umlal	r9, r10, r7, r14		@ d' += a[2] * b[8]
+	ldr	r0, [r1, #3*4]   		@ a[3]
+	umlal	r5, r6, r7, r8   		@ d += a[2] * b[7]
+	ldr	r14, [r2, #6*4]   		@ b[6]
+	umlal	r9, r10, r0, r8  		@ d' += a[3] * b[7]
+	ldr	r7, [r1, #4*4]   		@ a[4]
+	umlal	r5, r6, r0, r14   		@ d += a[3] * b[6]
+	ldr	r8, [r2, #5*4]   		@ b[5]
+	umlal	r9, r10, r7, r14  		@ d' += a[4] * b[6]
+	ldr	r0, [r1, #5*4]   		@ a[5]
+	umlal	r5, r6, r7, r8   		@ d += a[4] * b[5]
+	ldr	r14, [r2, #4*4]   		@ b[4]
+	umlal	r9, r10, r0, r8  		@ d' += a[5] * b[5]
+	ldr	r7, [r1, #6*4]   		@ a[6]
+	umlal	r5, r6, r0, r14   		@ d += a[5] * b[4]
+	ldr	r8, [r2, #3*4]   		@ b[3]
+	umlal	r9, r10, r7, r14  		@ d' += a[6] * b[4]
+	ldr	r0, [r1, #7*4]   		@ a[7]
+	umlal	r5, r6, r7, r8   		@ d += a[6] * b[3]
+	ldr	r14, [r2, #2*4]   		@ b[2]
+	umlal	r9, r10, r0, r8  		@ d' += a[7] * b[3]
+	ldr	r7, [r1, #8*4]   		@ a[8]
+	umlal	r5, r6, r0, r14   		@ d += a[7] * b[2]
+	ldr	r8, [r2, #1*4]   		@ b[1]
+	umlal	r9, r10, r7, r14  		@ d' += a[8] * b[2]
+	ldr	r0, [r1, #9*4]   		@ a[9]
+	umlal	r5, r6, r7, r8   		@ d += a[8] * b[1]
+	ldr	r14, [r2, #0*4]   		@ b[0]
+	umlal	r9, r10, r0, r8  		@ d' += a[9] * b[1]
+	ldr	r7, [r1, #0*4]   		@ a[0]
+	umlal	r5, r6, r0, r14   		@ d += a[9] * b[0]
+	@ r7,r14 used in B
+
+	bic	r0, r5, field_not_M 		@ t9 = d & M
+	str     r0, [sp, #4 + 4*9]
+	mov	r5, r5, lsr #26     		@ d >>= 26 
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+
+	/* B */
+	umull	r3, r4, r7, r14   		@ c = a[0] * b[0]
+	adds	r5, r5, r9       		@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u0 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u0 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t0 = c & M
+	str	r14, [sp, #4 + 0*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u0 * R1
+	umlal   r3, r4, r0, r14
+
+	/* C - interleaved with D */
+	ldr	r7, [r1, #0*4]   		@ a[0]
+	ldr	r8, [r2, #2*4]   		@ b[2]
+	ldr	r14, [r2, #1*4]   		@ b[1]
+	umull	r11, r12, r7, r8   		@ c' = a[0] * b[2]
+	ldr	r0, [r1, #1*4]   		@ a[1]
+	umlal   r3, r4, r7, r14   		@ c += a[0] * b[1]
+	ldr	r8, [r2, #0*4]   		@ b[0]
+	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[1]
+	ldr	r7, [r1, #2*4]   		@ a[2]
+	umlal   r3, r4, r0, r8   		@ c += a[1] * b[0]
+	ldr	r14, [r2, #9*4]   		@ b[9]
+	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[0]
+	ldr	r0, [r1, #3*4]   		@ a[3]
+	umlal	r5, r6, r7, r14   		@ d += a[2] * b[9]
+	ldr	r8, [r2, #8*4]   		@ b[8]
+	umull	r9, r10, r0, r14   		@ d' = a[3] * b[9]
+	ldr	r7, [r1, #4*4]   		@ a[4]
+	umlal	r5, r6, r0, r8   		@ d += a[3] * b[8]
+	ldr	r14, [r2, #7*4]   		@ b[7]
+	umlal	r9, r10, r7, r8   		@ d' += a[4] * b[8]
+	ldr	r0, [r1, #5*4]   		@ a[5]
+	umlal	r5, r6, r7, r14   		@ d += a[4] * b[7]
+	ldr	r8, [r2, #6*4]   		@ b[6]
+	umlal	r9, r10, r0, r14   		@ d' += a[5] * b[7]
+	ldr	r7, [r1, #6*4]   		@ a[6]
+	umlal	r5, r6, r0, r8   		@ d += a[5] * b[6]
+	ldr	r14, [r2, #5*4]   		@ b[5]
+	umlal	r9, r10, r7, r8   		@ d' += a[6] * b[6]
+	ldr	r0, [r1, #7*4]   		@ a[7]
+	umlal	r5, r6, r7, r14   		@ d += a[6] * b[5]
+	ldr	r8, [r2, #4*4]   		@ b[4]
+	umlal	r9, r10, r0, r14   		@ d' += a[7] * b[5]
+	ldr	r7, [r1, #8*4]   		@ a[8]
+	umlal	r5, r6, r0, r8   		@ d += a[7] * b[4]
+	ldr	r14, [r2, #3*4]   		@ b[3]
+	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[4]
+	ldr	r0, [r1, #9*4]   		@ a[9]
+	umlal	r5, r6, r7, r14   		@ d += a[8] * b[3]
+	ldr	r8, [r2, #2*4]   		@ b[2]
+	umlal	r9, r10, r0, r14   		@ d' += a[9] * b[3]
+	umlal	r5, r6, r0, r8   		@ d += a[9] * b[2]
+
+	bic	r0, r5, field_not_M 		@ u1 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u1 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t1 = c & M
+	str	r14, [sp, #4 + 1*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u1 * R1
+	umlal   r3, r4, r0, r14
+
+	/* D */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u2 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u2 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t2 = c & M
+	str	r14, [sp, #4 + 2*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u2 * R1
+	umlal   r3, r4, r0, r14
+
+	/* E - interleaved with F */
+	ldr	r7, [r1, #0*4]   		@ a[0]
+	ldr	r8, [r2, #4*4]   		@ b[4]
+	umull	r11, r12, r7, r8   		@ c' = a[0] * b[4]
+	ldr	r8, [r2, #3*4]   		@ b[3]
+	umlal   r3, r4, r7, r8   		@ c += a[0] * b[3]
+	ldr	r7, [r1, #1*4]   		@ a[1]
+	umlal   r11, r12, r7, r8   		@ c' += a[1] * b[3]
+	ldr	r8, [r2, #2*4]   		@ b[2]
+	umlal   r3, r4, r7, r8   		@ c += a[1] * b[2]
+	ldr	r7, [r1, #2*4]   		@ a[2]
+	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[2]
+	ldr	r8, [r2, #1*4]   		@ b[1]
+	umlal   r3, r4, r7, r8   		@ c += a[2] * b[1]
+	ldr	r7, [r1, #3*4]   		@ a[3]
+	umlal   r11, r12, r7, r8   		@ c' += a[3] * b[1]
+	ldr	r8, [r2, #0*4]   		@ b[0]
+	umlal   r3, r4, r7, r8   		@ c += a[3] * b[0]
+	ldr	r7, [r1, #4*4]   		@ a[4]
+	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[0]
+	ldr	r8, [r2, #9*4]   		@ b[9]
+	umlal	r5, r6, r7, r8   		@ d += a[4] * b[9]
+	ldr	r7, [r1, #5*4]   		@ a[5]
+	umull	r9, r10, r7, r8   		@ d' = a[5] * b[9]
+	ldr	r8, [r2, #8*4]   		@ b[8]
+	umlal	r5, r6, r7, r8   		@ d += a[5] * b[8]
+	ldr	r7, [r1, #6*4]   		@ a[6]
+	umlal	r9, r10, r7, r8   		@ d' += a[6] * b[8]
+	ldr	r8, [r2, #7*4]   		@ b[7]
+	umlal	r5, r6, r7, r8   		@ d += a[6] * b[7]
+	ldr	r7, [r1, #7*4]   		@ a[7]
+	umlal	r9, r10, r7, r8   		@ d' += a[7] * b[7]
+	ldr	r8, [r2, #6*4]   		@ b[6]
+	umlal	r5, r6, r7, r8   		@ d += a[7] * b[6]
+	ldr	r7, [r1, #8*4]   		@ a[8]
+	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[6]
+	ldr	r8, [r2, #5*4]   		@ b[5]
+	umlal	r5, r6, r7, r8   		@ d += a[8] * b[5]
+	ldr	r7, [r1, #9*4]   		@ a[9]
+	umlal	r9, r10, r7, r8   		@ d' += a[9] * b[5]
+	ldr	r8, [r2, #4*4]   		@ b[4]
+	umlal	r5, r6, r7, r8   		@ d += a[9] * b[4]
+
+	bic	r0, r5, field_not_M 		@ u3 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u3 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t3 = c & M
+	str	r14, [sp, #4 + 3*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u3 * R1
+	umlal   r3, r4, r0, r14
+
+	/* F */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u4 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u4 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t4 = c & M
+	str	r14, [sp, #4 + 4*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u4 * R1
+	umlal   r3, r4, r0, r14
+
+	/* G - interleaved with H */
+	ldr	r7, [r1, #0*4]   		@ a[0]
+	ldr	r8, [r2, #6*4]   		@ b[6]
+	ldr	r14, [r2, #5*4]   		@ b[5]
+	umull	r11, r12, r7, r8   		@ c' = a[0] * b[6]
+	ldr	r0, [r1, #1*4]   		@ a[1]
+	umlal   r3, r4, r7, r14   		@ c += a[0] * b[5]
+	ldr	r8, [r2, #4*4]   		@ b[4]
+	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[5]
+	ldr	r7, [r1, #2*4]   		@ a[2]
+	umlal   r3, r4, r0, r8   		@ c += a[1] * b[4]
+	ldr	r14, [r2, #3*4]   		@ b[3]
+	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[4]
+	ldr	r0, [r1, #3*4]   		@ a[3]
+	umlal   r3, r4, r7, r14   		@ c += a[2] * b[3]
+	ldr	r8, [r2, #2*4]   		@ b[2]
+	umlal   r11, r12, r0, r14   		@ c' += a[3] * b[3]
+	ldr	r7, [r1, #4*4]   		@ a[4]
+	umlal   r3, r4, r0, r8   		@ c += a[3] * b[2]
+	ldr	r14, [r2, #1*4]   		@ b[1]
+	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[2]
+	ldr	r0, [r1, #5*4]   		@ a[5]
+	umlal   r3, r4, r7, r14   		@ c += a[4] * b[1]
+	ldr	r8, [r2, #0*4]   		@ b[0]
+	umlal   r11, r12, r0, r14   		@ c' += a[5] * b[1]
+	ldr	r7, [r1, #6*4]   		@ a[6]
+	umlal   r3, r4, r0, r8   		@ c += a[5] * b[0]
+	ldr	r14, [r2, #9*4]   		@ b[9]
+	umlal   r11, r12, r7, r8   		@ c' += a[6] * b[0]
+	ldr	r0, [r1, #7*4]   		@ a[7]
+	umlal	r5, r6, r7, r14   		@ d += a[6] * b[9]
+	ldr	r8, [r2, #8*4]   		@ b[8]
+	umull	r9, r10, r0, r14   		@ d' = a[7] * b[9]
+	ldr	r7, [r1, #8*4]   		@ a[8]
+	umlal	r5, r6, r0, r8   		@ d += a[7] * b[8]
+	ldr	r14, [r2, #7*4]   		@ b[7]
+	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[8]
+	ldr	r0, [r1, #9*4]   		@ a[9]
+	umlal	r5, r6, r7, r14   		@ d += a[8] * b[7]
+	ldr	r8, [r2, #6*4]   		@ b[6]
+	umlal	r9, r10, r0, r14   		@ d' += a[9] * b[7]
+	umlal	r5, r6, r0, r8   		@ d += a[9] * b[6]
+
+	bic	r0, r5, field_not_M 		@ u5 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u5 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t5 = c & M
+	str	r14, [sp, #4 + 5*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u5 * R1
+	umlal   r3, r4, r0, r14
+
+	/* H */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u6 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u6 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t6 = c & M
+	str	r14, [sp, #4 + 6*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u6 * R1
+	umlal   r3, r4, r0, r14
+
+	/* I - interleaved with J */
+	ldr	r8, [r2, #8*4]   		@ b[8]
+	ldr	r7, [r1, #0*4]   		@ a[0]
+	ldr	r14, [r2, #7*4]   		@ b[7]
+	umull   r11, r12, r7, r8   		@ c' = a[0] * b[8]
+	ldr	r0, [r1, #1*4]   		@ a[1]
+	umlal   r3, r4, r7, r14   		@ c += a[0] * b[7]
+	ldr	r8, [r2, #6*4]   		@ b[6]
+	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[7]
+	ldr	r7, [r1, #2*4]   		@ a[2]
+	umlal   r3, r4, r0, r8   		@ c += a[1] * b[6]
+	ldr	r14, [r2, #5*4]   		@ b[5]
+	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[6]
+	ldr	r0, [r1, #3*4]   		@ a[3]
+	umlal   r3, r4, r7, r14   		@ c += a[2] * b[5]
+	ldr	r8, [r2, #4*4]   		@ b[4]
+	umlal   r11, r12, r0, r14   		@ c' += a[3] * b[5]
+	ldr	r7, [r1, #4*4]   		@ a[4]
+	umlal   r3, r4, r0, r8   		@ c += a[3] * b[4]
+	ldr	r14, [r2, #3*4]   		@ b[3]
+	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[4]
+	ldr	r0, [r1, #5*4]   		@ a[5]
+	umlal   r3, r4, r7, r14   		@ c += a[4] * b[3]
+	ldr	r8, [r2, #2*4]   		@ b[2]
+	umlal   r11, r12, r0, r14   		@ c' += a[5] * b[3]
+	ldr	r7, [r1, #6*4]   		@ a[6]
+	umlal   r3, r4, r0, r8   		@ c += a[5] * b[2]
+	ldr	r14, [r2, #1*4]   		@ b[1]
+	umlal   r11, r12, r7, r8   		@ c' += a[6] * b[2]
+	ldr	r0, [r1, #7*4]   		@ a[7]
+	umlal   r3, r4, r7, r14   		@ c += a[6] * b[1]
+	ldr	r8, [r2, #0*4]   		@ b[0]
+	umlal   r11, r12, r0, r14   		@ c' += a[7] * b[1]
+	ldr	r7, [r1, #8*4]   		@ a[8]
+	umlal   r3, r4, r0, r8   		@ c += a[7] * b[0]
+	ldr	r14, [r2, #9*4]   		@ b[9]
+	umlal   r11, r12, r7, r8   		@ c' += a[8] * b[0]
+	ldr	r0, [r1, #9*4]   		@ a[9]
+	umlal	r5, r6, r7, r14   		@ d += a[8] * b[9]
+	ldr	r8, [r2, #8*4]   		@ b[8]
+	umull	r9, r10, r0, r14  		@ d' = a[9] * b[9]
+	umlal	r5, r6, r0, r8   		@ d += a[9] * b[8]
+
+	bic	r0, r5, field_not_M 		@ u7 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u7 * R0
+	umlal   r3, r4, r0, r14
+
+	bic	r14, r3, field_not_M 		@ t7 = c & M
+	str	r14, [sp, #4 + 7*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u7 * R1
+	umlal   r3, r4, r0, r14
+
+	/* J */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u8 = d & M
+	str	r0, [sp, #4 + 8*4]
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u8 * R0
+	umlal   r3, r4, r0, r14
+
+	/******************************************
+	 * compute and write back result
+	 ******************************************
+	Allocation:
+	    r0    r
+	    r3:r4 c
+	    r5:r6 d
+	    r7    t0
+	    r8    t1
+	    r9    t2
+	    r11   u8
+	    r12   t9
+	    r1,r2,r10,r14 scratch
+
+	Note: do not read from a[] after here, it may overlap with r[]
+	*/
+	ldr	r0, [sp, #0]
+	add	r1, sp, #4 + 3*4		@ r[3..7] = t3..7, r11=u8, r12=t9
+	ldmia	r1, {r2,r7,r8,r9,r10,r11,r12}
+	add	r1, r0, #3*4
+	stmia	r1, {r2,r7,r8,r9,r10}
+
+	bic	r2, r3, field_not_M 		@ r[8] = c & M
+	str	r2, [r0, #8*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u8 * R1
+	umlal   r3, r4, r11, r14
+	movw    r14, field_R0			@ c += d * R0
+	umlal   r3, r4, r5, r14
+	adds	r3, r3, r12			@ c += t9
+	adc	r4, r4, #0
+
+	add	r1, sp, #4 + 0*4		@ r7,r8,r9 = t0,t1,t2
+	ldmia	r1, {r7,r8,r9}
+
+	ubfx	r2, r3, #0, #22     		@ r[9] = c & (M >> 4)
+	str	r2, [r0, #9*4]
+	mov	r3, r3, lsr #22     		@ c >>= 22
+	orr	r3, r3, r4, asl #10
+	mov     r4, r4, lsr #22
+	movw    r14, field_R1 << 4   		@ c += d * (R1 << 4)
+	umlal   r3, r4, r5, r14
+
+	movw    r14, field_R0 >> 4   		@ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
+	umull	r5, r6, r3, r14			@ d = c.lo * (R0 >> 4)
+	adds	r5, r5, r7	    		@ d.lo += t0
+	mla	r6, r14, r4, r6			@ d.hi += c.hi * (R0 >> 4)
+	adc	r6, r6, 0	     		@ d.hi += carry
+
+	bic	r2, r5, field_not_M 		@ r[0] = d & M
+	str	r2, [r0, #0*4]
+
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	
+	movw    r14, field_R1 >> 4   		@ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
+	umull	r1, r2, r3, r14       		@ tmp = c.lo * (R1 >> 4)
+	adds	r5, r5, r8	    		@ d.lo += t1
+	adc	r6, r6, #0	    		@ d.hi += carry
+	adds	r5, r5, r1	    		@ d.lo += tmp.lo
+	mla	r2, r14, r4, r2      		@ tmp.hi += c.hi * (R1 >> 4)
+	adc	r6, r6, r2	   		@ d.hi += carry + tmp.hi
+
+	bic	r2, r5, field_not_M 		@ r[1] = d & M
+	str	r2, [r0, #1*4]
+	mov	r5, r5, lsr #26     		@ d >>= 26 (ignore hi)
+	orr	r5, r5, r6, asl #6
+
+	add	r5, r5, r9	  		@ d += t2
+	str	r5, [r0, #2*4]      		@ r[2] = d
+
+	add	sp, sp, #48
+	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+	.size	secp256k1_fe_mul_inner, .-secp256k1_fe_mul_inner
+
+	.align	2
+	.global secp256k1_fe_sqr_inner
+	.type	secp256k1_fe_sqr_inner, %function
+	.hidden secp256k1_fe_sqr_inner
+	@ Arguments:
+	@  r0  r	 Can overlap with a
+	@  r1  a
+	@ Stack (total 4+10*4 = 44)
+	@  sp + #0        saved 'r' pointer
+	@  sp + #4 + 4*X  t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
+secp256k1_fe_sqr_inner:
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
+	sub	sp, sp, #48			@ frame=44 + alignment
+	str     r0, [sp, #0]			@ save result address, we need it only at the end
+	/******************************************
+	 * Main computation code.
+	 ******************************************
+
+	Allocation:
+	    r0,r14,r2,r7,r8   scratch
+	    r1       a (pointer)
+	    r3:r4    c
+	    r5:r6    d
+	    r11:r12  c'
+	    r9:r10   d'
+
+	Note: do not write to r[] here, it may overlap with a[]
+	*/
+	/* A interleaved with B */
+	ldr	r0, [r1, #1*4]			@ a[1]*2
+	ldr	r7, [r1, #0*4]			@ a[0]
+	mov	r0, r0, asl #1
+	ldr	r14, [r1, #9*4]			@ a[9]
+	umull	r3, r4, r7, r7			@ c = a[0] * a[0]
+	ldr	r8, [r1, #8*4]			@ a[8]
+	mov	r7, r7, asl #1
+	umull	r5, r6, r7, r14			@ d = a[0]*2 * a[9]
+	ldr	r7, [r1, #2*4]			@ a[2]*2
+	umull	r9, r10, r0, r14		@ d' = a[1]*2 * a[9]
+	ldr	r14, [r1, #7*4]			@ a[7]
+	umlal	r5, r6, r0, r8			@ d += a[1]*2 * a[8]
+	mov	r7, r7, asl #1
+	ldr	r0, [r1, #3*4]			@ a[3]*2
+	umlal	r9, r10, r7, r8			@ d' += a[2]*2 * a[8]
+	ldr	r8, [r1, #6*4]			@ a[6]
+	umlal	r5, r6, r7, r14			@ d += a[2]*2 * a[7]
+	mov	r0, r0, asl #1
+	ldr	r7, [r1, #4*4]			@ a[4]*2
+	umlal	r9, r10, r0, r14		@ d' += a[3]*2 * a[7]
+	ldr	r14, [r1, #5*4]			@ a[5]
+	mov	r7, r7, asl #1
+	umlal	r5, r6, r0, r8			@ d += a[3]*2 * a[6]
+	umlal	r9, r10, r7, r8			@ d' += a[4]*2 * a[6]
+	umlal	r5, r6, r7, r14			@ d += a[4]*2 * a[5]
+	umlal	r9, r10, r14, r14		@ d' += a[5] * a[5]
+
+	bic	r0, r5, field_not_M 		@ t9 = d & M
+	str     r0, [sp, #4 + 9*4]
+	mov	r5, r5, lsr #26     		@ d >>= 26 
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+
+	/* B */
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u0 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u0 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t0 = c & M
+	str	r14, [sp, #4 + 0*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u0 * R1
+	umlal   r3, r4, r0, r14
+
+	/* C interleaved with D */
+	ldr	r0, [r1, #0*4]			@ a[0]*2
+	ldr	r14, [r1, #1*4]			@ a[1]
+	mov	r0, r0, asl #1
+	ldr	r8, [r1, #2*4]			@ a[2]
+	umlal	r3, r4, r0, r14			@ c += a[0]*2 * a[1]
+	mov	r7, r8, asl #1                  @ a[2]*2
+	umull	r11, r12, r14, r14		@ c' = a[1] * a[1]
+	ldr	r14, [r1, #9*4]			@ a[9]
+	umlal	r11, r12, r0, r8		@ c' += a[0]*2 * a[2]
+	ldr	r0, [r1, #3*4]			@ a[3]*2
+	ldr	r8, [r1, #8*4]			@ a[8]
+	umlal	r5, r6, r7, r14			@ d += a[2]*2 * a[9]
+	mov	r0, r0, asl #1
+	ldr	r7, [r1, #4*4]			@ a[4]*2
+	umull	r9, r10, r0, r14		@ d' = a[3]*2 * a[9]
+	ldr	r14, [r1, #7*4]			@ a[7]
+	umlal	r5, r6, r0, r8			@ d += a[3]*2 * a[8]
+	mov	r7, r7, asl #1
+	ldr	r0, [r1, #5*4]			@ a[5]*2
+	umlal	r9, r10, r7, r8			@ d' += a[4]*2 * a[8]
+	ldr	r8, [r1, #6*4]			@ a[6]
+	mov	r0, r0, asl #1
+	umlal	r5, r6, r7, r14			@ d += a[4]*2 * a[7]
+	umlal	r9, r10, r0, r14		@ d' += a[5]*2 * a[7]
+	umlal	r5, r6, r0, r8			@ d += a[5]*2 * a[6]
+	umlal	r9, r10, r8, r8			@ d' += a[6] * a[6]
+
+	bic	r0, r5, field_not_M 		@ u1 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u1 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t1 = c & M
+	str	r14, [sp, #4 + 1*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u1 * R1
+	umlal   r3, r4, r0, r14
+
+	/* D */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u2 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u2 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t2 = c & M
+	str	r14, [sp, #4 + 2*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u2 * R1
+	umlal   r3, r4, r0, r14
+
+	/* E interleaved with F */
+	ldr	r7, [r1, #0*4]			@ a[0]*2
+	ldr	r0, [r1, #1*4]			@ a[1]*2
+	ldr	r14, [r1, #2*4]			@ a[2]
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #3*4]			@ a[3]
+	ldr	r2, [r1, #4*4]
+	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[3]
+	mov	r0, r0, asl #1
+	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[4]
+	mov	r2, r2, asl #1			@ a[4]*2
+	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[3]
+	ldr	r8, [r1, #9*4]			@ a[9]
+	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[2]
+	ldr	r0, [r1, #5*4]			@ a[5]*2
+	umlal	r11, r12, r14, r14		@ c' += a[2] * a[2]
+	ldr	r14, [r1, #8*4]			@ a[8]
+	mov	r0, r0, asl #1
+	umlal	r5, r6, r2, r8			@ d += a[4]*2 * a[9]
+	ldr	r7, [r1, #6*4]			@ a[6]*2
+	umull	r9, r10, r0, r8			@ d' = a[5]*2 * a[9]
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #7*4]			@ a[7]
+	umlal	r5, r6, r0, r14			@ d += a[5]*2 * a[8]
+	umlal	r9, r10, r7, r14		@ d' += a[6]*2 * a[8]
+	umlal	r5, r6, r7, r8			@ d += a[6]*2 * a[7]
+	umlal	r9, r10, r8, r8			@ d' += a[7] * a[7]
+
+	bic	r0, r5, field_not_M 		@ u3 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u3 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t3 = c & M
+	str	r14, [sp, #4 + 3*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u3 * R1
+	umlal   r3, r4, r0, r14
+
+	/* F */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u4 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u4 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t4 = c & M
+	str	r14, [sp, #4 + 4*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u4 * R1
+	umlal   r3, r4, r0, r14
+
+	/* G interleaved with H */
+	ldr	r7, [r1, #0*4]			@ a[0]*2
+	ldr	r0, [r1, #1*4]			@ a[1]*2
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #5*4]			@ a[5]
+	ldr	r2, [r1, #6*4]			@ a[6]
+	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[5]
+	ldr	r14, [r1, #4*4]			@ a[4]
+	mov	r0, r0, asl #1
+	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[6]
+	ldr	r7, [r1, #2*4]			@ a[2]*2
+	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[5]
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #3*4]			@ a[3]
+	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[4]
+	mov	r0, r2, asl #1			@ a[6]*2
+	umlal	r11, r12, r7, r14		@ c' += a[2]*2 * a[4]
+	ldr	r14, [r1, #9*4]			@ a[9]
+	umlal	r3, r4, r7, r8			@ c += a[2]*2 * a[3]
+	ldr	r7, [r1, #7*4]			@ a[7]*2
+	umlal	r11, r12, r8, r8		@ c' += a[3] * a[3]
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #8*4]			@ a[8]
+	umlal	r5, r6, r0, r14			@ d += a[6]*2 * a[9]
+	umull	r9, r10, r7, r14		@ d' = a[7]*2 * a[9]
+	umlal	r5, r6, r7, r8			@ d += a[7]*2 * a[8]
+	umlal	r9, r10, r8, r8			@ d' += a[8] * a[8]
+
+	bic	r0, r5, field_not_M 		@ u5 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u5 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t5 = c & M
+	str	r14, [sp, #4 + 5*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u5 * R1
+	umlal   r3, r4, r0, r14
+
+	/* H */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	adds	r5, r5, r9			@ d += d'
+	adc	r6, r6, r10
+
+	bic	r0, r5, field_not_M 		@ u6 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u6 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t6 = c & M
+	str	r14, [sp, #4 + 6*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u6 * R1
+	umlal   r3, r4, r0, r14
+
+	/* I interleaved with J */
+	ldr	r7, [r1, #0*4]			@ a[0]*2
+	ldr	r0, [r1, #1*4]			@ a[1]*2
+	mov	r7, r7, asl #1
+	ldr	r8, [r1, #7*4]			@ a[7]
+	ldr	r2, [r1, #8*4]			@ a[8]
+	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[7]
+	ldr	r14, [r1, #6*4]			@ a[6]
+	mov	r0, r0, asl #1
+	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[8]
+	ldr	r7, [r1, #2*4]			@ a[2]*2
+	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[7]
+	ldr	r8, [r1, #5*4]			@ a[5]
+	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[6]
+	ldr	r0, [r1, #3*4]			@ a[3]*2
+	mov	r7, r7, asl #1
+	umlal	r11, r12, r7, r14		@ c' += a[2]*2 * a[6]
+	ldr	r14, [r1, #4*4]			@ a[4]
+	mov	r0, r0, asl #1
+	umlal	r3, r4, r7, r8			@ c += a[2]*2 * a[5]
+	mov	r2, r2, asl #1			@ a[8]*2
+	umlal	r11, r12, r0, r8		@ c' += a[3]*2 * a[5]
+	umlal	r3, r4, r0, r14			@ c += a[3]*2 * a[4]
+	umlal	r11, r12, r14, r14		@ c' += a[4] * a[4]
+	ldr	r8, [r1, #9*4]			@ a[9]
+	umlal	r5, r6, r2, r8			@ d += a[8]*2 * a[9]
+	@ r8 will be used in J
+
+	bic	r0, r5, field_not_M 		@ u7 = d & M
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u7 * R0
+	umlal   r3, r4, r0, r14
+	bic	r14, r3, field_not_M 		@ t7 = c & M
+	str	r14, [sp, #4 + 7*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u7 * R1
+	umlal   r3, r4, r0, r14
+
+	/* J */
+	adds	r3, r3, r11			@ c += c'
+	adc	r4, r4, r12
+	umlal	r5, r6, r8, r8			@ d += a[9] * a[9]
+
+	bic	r0, r5, field_not_M 		@ u8 = d & M
+	str	r0, [sp, #4 + 8*4]
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	movw    r14, field_R0			@ c += u8 * R0
+	umlal   r3, r4, r0, r14
+
+	/******************************************
+	 * compute and write back result
+	 ******************************************
+	Allocation:
+	    r0    r
+	    r3:r4 c
+	    r5:r6 d
+	    r7    t0
+	    r8    t1
+	    r9    t2
+	    r11   u8
+	    r12   t9
+	    r1,r2,r10,r14 scratch
+
+	Note: do not read from a[] after here, it may overlap with r[]
+	*/
+	ldr	r0, [sp, #0]
+	add	r1, sp, #4 + 3*4		@ r[3..7] = t3..7, r11=u8, r12=t9
+	ldmia	r1, {r2,r7,r8,r9,r10,r11,r12}
+	add	r1, r0, #3*4
+	stmia	r1, {r2,r7,r8,r9,r10}
+
+	bic	r2, r3, field_not_M 		@ r[8] = c & M
+	str	r2, [r0, #8*4]
+	mov	r3, r3, lsr #26     		@ c >>= 26
+	orr	r3, r3, r4, asl #6
+	mov     r4, r4, lsr #26
+	mov     r14, field_R1			@ c += u8 * R1
+	umlal   r3, r4, r11, r14
+	movw    r14, field_R0			@ c += d * R0
+	umlal   r3, r4, r5, r14
+	adds	r3, r3, r12			@ c += t9
+	adc	r4, r4, #0
+
+	add	r1, sp, #4 + 0*4		@ r7,r8,r9 = t0,t1,t2
+	ldmia	r1, {r7,r8,r9}
+
+	ubfx	r2, r3, #0, #22     		@ r[9] = c & (M >> 4)
+	str	r2, [r0, #9*4]
+	mov	r3, r3, lsr #22     		@ c >>= 22
+	orr	r3, r3, r4, asl #10
+	mov     r4, r4, lsr #22
+	movw    r14, field_R1 << 4   		@ c += d * (R1 << 4)
+	umlal   r3, r4, r5, r14
+
+	movw    r14, field_R0 >> 4   		@ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
+	umull	r5, r6, r3, r14			@ d = c.lo * (R0 >> 4)
+	adds	r5, r5, r7	    		@ d.lo += t0
+	mla	r6, r14, r4, r6			@ d.hi += c.hi * (R0 >> 4)
+	adc	r6, r6, 0	     		@ d.hi += carry
+
+	bic	r2, r5, field_not_M 		@ r[0] = d & M
+	str	r2, [r0, #0*4]
+
+	mov	r5, r5, lsr #26     		@ d >>= 26
+	orr	r5, r5, r6, asl #6
+	mov     r6, r6, lsr #26
+	
+	movw    r14, field_R1 >> 4   		@ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
+	umull	r1, r2, r3, r14       		@ tmp = c.lo * (R1 >> 4)
+	adds	r5, r5, r8	    		@ d.lo += t1
+	adc	r6, r6, #0	    		@ d.hi += carry
+	adds	r5, r5, r1	    		@ d.lo += tmp.lo
+	mla	r2, r14, r4, r2      		@ tmp.hi += c.hi * (R1 >> 4)
+	adc	r6, r6, r2	   		@ d.hi += carry + tmp.hi
+
+	bic	r2, r5, field_not_M 		@ r[1] = d & M
+	str	r2, [r0, #1*4]
+	mov	r5, r5, lsr #26     		@ d >>= 26 (ignore hi)
+	orr	r5, r5, r6, asl #6
+
+	add	r5, r5, r9	  		@ d += t2
+	str	r5, [r0, #2*4]      		@ r[2] = d
+
+	add	sp, sp, #48
+	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+	.size	secp256k1_fe_sqr_inner, .-secp256k1_fe_sqr_inner
+
+	.section .note.GNU-stack,"",%progbits
--- a/libsecp256k1/src/assumptions.h
+++ b/libsecp256k1/src/assumptions.h
@@ -0,0 +1,87 @@
+/***********************************************************************
+ * Copyright (c) 2020 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ASSUMPTIONS_H
+#define SECP256K1_ASSUMPTIONS_H
+
+#include <limits.h>
+
+#include "util.h"
+#if defined(SECP256K1_INT128_NATIVE)
+#include "int128_native.h"
+#endif
+
+/* This library, like most software, relies on a number of compiler implementation defined (but not undefined)
+   behaviours. Although the behaviours we require are essentially universal we test them specifically here to
+   reduce the odds of experiencing an unwelcome surprise.
+*/
+
+#if defined(__has_attribute)
+# if __has_attribute(__unavailable__)
+__attribute__((__unavailable__("Don't call this function. It only exists because STATIC_ASSERT cannot be used outside a function.")))
+# endif
+#endif
+static void secp256k1_assumption_checker(void) {
+    /* Bytes are 8 bits. */
+    STATIC_ASSERT(CHAR_BIT == 8);
+
+    /* No integer promotion for uint32_t. This ensures that we can multiply uintXX_t values where XX >= 32
+       without signed overflow, which would be undefined behaviour. */
+    STATIC_ASSERT(UINT_MAX <= UINT32_MAX);
+
+    /* Conversions from unsigned to signed outside of the bounds of the signed type are
+       implementation-defined. Verify that they function as reinterpreting the lower
+       bits of the input in two's complement notation. Do this for conversions:
+       - from uint(N)_t to int(N)_t with negative result
+       - from uint(2N)_t to int(N)_t with negative result
+       - from int(2N)_t to int(N)_t with negative result
+       - from int(2N)_t to int(N)_t with positive result */
+
+    /* To int8_t. */
+    STATIC_ASSERT(((int8_t)(uint8_t)0xAB == (int8_t)-(int8_t)0x55));
+    STATIC_ASSERT((int8_t)(uint16_t)0xABCD == (int8_t)-(int8_t)0x33);
+    STATIC_ASSERT((int8_t)(int16_t)(uint16_t)0xCDEF == (int8_t)(uint8_t)0xEF);
+    STATIC_ASSERT((int8_t)(int16_t)(uint16_t)0x9234 == (int8_t)(uint8_t)0x34);
+
+    /* To int16_t. */
+    STATIC_ASSERT((int16_t)(uint16_t)0xBCDE == (int16_t)-(int16_t)0x4322);
+    STATIC_ASSERT((int16_t)(uint32_t)0xA1B2C3D4 == (int16_t)-(int16_t)0x3C2C);
+    STATIC_ASSERT((int16_t)(int32_t)(uint32_t)0xC1D2E3F4 == (int16_t)(uint16_t)0xE3F4);
+    STATIC_ASSERT((int16_t)(int32_t)(uint32_t)0x92345678 == (int16_t)(uint16_t)0x5678);
+
+    /* To int32_t. */
+    STATIC_ASSERT((int32_t)(uint32_t)0xB2C3D4E5 == (int32_t)-(int32_t)0x4D3C2B1B);
+    STATIC_ASSERT((int32_t)(uint64_t)0xA123B456C789D012ULL == (int32_t)-(int32_t)0x38762FEE);
+    STATIC_ASSERT((int32_t)(int64_t)(uint64_t)0xC1D2E3F4A5B6C7D8ULL == (int32_t)(uint32_t)0xA5B6C7D8);
+    STATIC_ASSERT((int32_t)(int64_t)(uint64_t)0xABCDEF0123456789ULL == (int32_t)(uint32_t)0x23456789);
+
+    /* To int64_t. */
+    STATIC_ASSERT((int64_t)(uint64_t)0xB123C456D789E012ULL == (int64_t)-(int64_t)0x4EDC3BA928761FEEULL);
+#if defined(SECP256K1_INT128_NATIVE)
+    STATIC_ASSERT((int64_t)(((uint128_t)0xA1234567B8901234ULL << 64) + 0xC5678901D2345678ULL) == (int64_t)-(int64_t)0x3A9876FE2DCBA988ULL);
+    STATIC_ASSERT(((int64_t)(int128_t)(((uint128_t)0xB1C2D3E4F5A6B7C8ULL << 64) + 0xD9E0F1A2B3C4D5E6ULL)) == (int64_t)(uint64_t)0xD9E0F1A2B3C4D5E6ULL);
+    STATIC_ASSERT(((int64_t)(int128_t)(((uint128_t)0xABCDEF0123456789ULL << 64) + 0x0123456789ABCDEFULL)) == (int64_t)(uint64_t)0x0123456789ABCDEFULL);
+
+    /* To int128_t. */
+    STATIC_ASSERT((int128_t)(((uint128_t)0xB1234567C8901234ULL << 64) + 0xD5678901E2345678ULL) == (int128_t)(-(int128_t)0x8E1648B3F50E80DCULL * 0x8E1648B3F50E80DDULL + 0x5EA688D5482F9464ULL));
+#endif
+
+    /* Right shift on negative signed values is implementation defined. Verify that it
+       acts as a right shift in two's complement with sign extension (i.e duplicating
+       the top bit into newly added bits). */
+    STATIC_ASSERT((((int8_t)0xE8) >> 2) == (int8_t)(uint8_t)0xFA);
+    STATIC_ASSERT((((int16_t)0xE9AC) >> 4) == (int16_t)(uint16_t)0xFE9A);
+    STATIC_ASSERT((((int32_t)0x937C918A) >> 9) == (int32_t)(uint32_t)0xFFC9BE48);
+    STATIC_ASSERT((((int64_t)0xA8B72231DF9CF4B9ULL) >> 19) == (int64_t)(uint64_t)0xFFFFF516E4463BF3ULL);
+#if defined(SECP256K1_INT128_NATIVE)
+    STATIC_ASSERT((((int128_t)(((uint128_t)0xCD833A65684A0DBCULL << 64) + 0xB349312F71EA7637ULL)) >> 39) == (int128_t)(((uint128_t)0xFFFFFFFFFF9B0674ULL << 64) + 0xCAD0941B79669262ULL));
+#endif
+
+    /* This function is not supposed to be called. */
+    VERIFY_CHECK(0);
+}
+
+#endif /* SECP256K1_ASSUMPTIONS_H */
--- a/libsecp256k1/src/bench.c
+++ b/libsecp256k1/src/bench.c
@@ -0,0 +1,279 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "../include/secp256k1.h"
+#include "util.h"
+#include "bench.h"
+
+static void help(int default_iters) {
+    printf("Benchmarks the following algorithms:\n");
+    printf("    - ECDSA signing/verification\n");
+
+#ifdef ENABLE_MODULE_ECDH
+    printf("    - ECDH key exchange (optional module)\n");
+#endif
+
+#ifdef ENABLE_MODULE_RECOVERY
+    printf("    - Public key recovery (optional module)\n");
+#endif
+
+#ifdef ENABLE_MODULE_SCHNORRSIG
+    printf("    - Schnorr signatures (optional module)\n");
+#endif
+
+    printf("\n");
+    printf("The default number of iterations for each benchmark is %d. This can be\n", default_iters);
+    printf("customized using the SECP256K1_BENCH_ITERS environment variable.\n");
+    printf("\n");
+    printf("Usage: ./bench [args]\n");
+    printf("By default, all benchmarks will be run.\n");
+    printf("args:\n");
+    printf("    help              : display this help and exit\n");
+    printf("    ecdsa             : all ECDSA algorithms--sign, verify, recovery (if enabled)\n");
+    printf("    ecdsa_sign        : ECDSA siging algorithm\n");
+    printf("    ecdsa_verify      : ECDSA verification algorithm\n");
+    printf("    ec                : all EC public key algorithms (keygen)\n");
+    printf("    ec_keygen         : EC public key generation\n");
+
+#ifdef ENABLE_MODULE_RECOVERY
+    printf("    ecdsa_recover     : ECDSA public key recovery algorithm\n");
+#endif
+
+#ifdef ENABLE_MODULE_ECDH
+    printf("    ecdh              : ECDH key exchange algorithm\n");
+#endif
+
+#ifdef ENABLE_MODULE_SCHNORRSIG
+    printf("    schnorrsig        : all Schnorr signature algorithms (sign, verify)\n");
+    printf("    schnorrsig_sign   : Schnorr sigining algorithm\n");
+    printf("    schnorrsig_verify : Schnorr verification algorithm\n");
+#endif
+
+#ifdef ENABLE_MODULE_ELLSWIFT
+    printf("    ellswift          : all ElligatorSwift benchmarks (encode, decode, keygen, ecdh)\n");
+    printf("    ellswift_encode   : ElligatorSwift encoding\n");
+    printf("    ellswift_decode   : ElligatorSwift decoding\n");
+    printf("    ellswift_keygen   : ElligatorSwift key generation\n");
+    printf("    ellswift_ecdh     : ECDH on ElligatorSwift keys\n");
+#endif
+
+    printf("\n");
+}
+
+typedef struct {
+    secp256k1_context *ctx;
+    unsigned char msg[32];
+    unsigned char key[32];
+    unsigned char sig[72];
+    size_t siglen;
+    unsigned char pubkey[33];
+    size_t pubkeylen;
+} bench_data;
+
+static void bench_verify(void* arg, int iters) {
+    int i;
+    bench_data* data = (bench_data*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_pubkey pubkey;
+        secp256k1_ecdsa_signature sig;
+        data->sig[data->siglen - 1] ^= (i & 0xFF);
+        data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF);
+        data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF);
+        CHECK(secp256k1_ec_pubkey_parse(data->ctx, &pubkey, data->pubkey, data->pubkeylen) == 1);
+        CHECK(secp256k1_ecdsa_signature_parse_der(data->ctx, &sig, data->sig, data->siglen) == 1);
+        CHECK(secp256k1_ecdsa_verify(data->ctx, &sig, data->msg, &pubkey) == (i == 0));
+        data->sig[data->siglen - 1] ^= (i & 0xFF);
+        data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF);
+        data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF);
+    }
+}
+
+static void bench_sign_setup(void* arg) {
+    int i;
+    bench_data *data = (bench_data*)arg;
+
+    for (i = 0; i < 32; i++) {
+        data->msg[i] = i + 1;
+    }
+    for (i = 0; i < 32; i++) {
+        data->key[i] = i + 65;
+    }
+}
+
+static void bench_sign_run(void* arg, int iters) {
+    int i;
+    bench_data *data = (bench_data*)arg;
+
+    unsigned char sig[74];
+    for (i = 0; i < iters; i++) {
+        size_t siglen = 74;
+        int j;
+        secp256k1_ecdsa_signature signature;
+        CHECK(secp256k1_ecdsa_sign(data->ctx, &signature, data->msg, data->key, NULL, NULL));
+        CHECK(secp256k1_ecdsa_signature_serialize_der(data->ctx, sig, &siglen, &signature));
+        for (j = 0; j < 32; j++) {
+            data->msg[j] = sig[j];
+            data->key[j] = sig[j + 32];
+        }
+    }
+}
+
+static void bench_keygen_setup(void* arg) {
+    int i;
+    bench_data *data = (bench_data*)arg;
+
+    for (i = 0; i < 32; i++) {
+        data->key[i] = i + 65;
+    }
+}
+
+static void bench_keygen_run(void *arg, int iters) {
+    int i;
+    bench_data *data = (bench_data*)arg;
+
+    for (i = 0; i < iters; i++) {
+        unsigned char pub33[33];
+        size_t len = 33;
+        secp256k1_pubkey pubkey;
+        CHECK(secp256k1_ec_pubkey_create(data->ctx, &pubkey, data->key));
+        CHECK(secp256k1_ec_pubkey_serialize(data->ctx, pub33, &len, &pubkey, SECP256K1_EC_COMPRESSED));
+        memcpy(data->key, pub33 + 1, 32);
+    }
+}
+
+
+#ifdef ENABLE_MODULE_ECDH
+# include "modules/ecdh/bench_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_RECOVERY
+# include "modules/recovery/bench_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_SCHNORRSIG
+# include "modules/schnorrsig/bench_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_ELLSWIFT
+# include "modules/ellswift/bench_impl.h"
+#endif
+
+int main(int argc, char** argv) {
+    int i;
+    secp256k1_pubkey pubkey;
+    secp256k1_ecdsa_signature sig;
+    bench_data data;
+
+    int d = argc == 1;
+    int default_iters = 20000;
+    int iters = get_iters(default_iters);
+
+    /* Check for invalid user arguments */
+    char* valid_args[] = {"ecdsa", "verify", "ecdsa_verify", "sign", "ecdsa_sign", "ecdh", "recover",
+                         "ecdsa_recover", "schnorrsig", "schnorrsig_verify", "schnorrsig_sign", "ec",
+                         "keygen", "ec_keygen", "ellswift", "encode", "ellswift_encode", "decode",
+                         "ellswift_decode", "ellswift_keygen", "ellswift_ecdh"};
+    size_t valid_args_size = sizeof(valid_args)/sizeof(valid_args[0]);
+    int invalid_args = have_invalid_args(argc, argv, valid_args, valid_args_size);
+
+    if (argc > 1) {
+        if (have_flag(argc, argv, "-h")
+           || have_flag(argc, argv, "--help")
+           || have_flag(argc, argv, "help")) {
+            help(default_iters);
+            return 0;
+        } else if (invalid_args) {
+            fprintf(stderr, "./bench: unrecognized argument.\n\n");
+            help(default_iters);
+            return 1;
+        }
+    }
+
+/* Check if the user tries to benchmark optional module without building it */
+#ifndef ENABLE_MODULE_ECDH
+    if (have_flag(argc, argv, "ecdh")) {
+        fprintf(stderr, "./bench: ECDH module not enabled.\n");
+        fprintf(stderr, "Use ./configure --enable-module-ecdh.\n\n");
+        return 1;
+    }
+#endif
+
+#ifndef ENABLE_MODULE_RECOVERY
+    if (have_flag(argc, argv, "recover") || have_flag(argc, argv, "ecdsa_recover")) {
+        fprintf(stderr, "./bench: Public key recovery module not enabled.\n");
+        fprintf(stderr, "Use ./configure --enable-module-recovery.\n\n");
+        return 1;
+    }
+#endif
+
+#ifndef ENABLE_MODULE_SCHNORRSIG
+    if (have_flag(argc, argv, "schnorrsig") || have_flag(argc, argv, "schnorrsig_sign") || have_flag(argc, argv, "schnorrsig_verify")) {
+        fprintf(stderr, "./bench: Schnorr signatures module not enabled.\n");
+        fprintf(stderr, "Use ./configure --enable-module-schnorrsig.\n\n");
+        return 1;
+    }
+#endif
+
+#ifndef ENABLE_MODULE_ELLSWIFT
+    if (have_flag(argc, argv, "ellswift") || have_flag(argc, argv, "ellswift_encode") || have_flag(argc, argv, "ellswift_decode") ||
+        have_flag(argc, argv, "encode") || have_flag(argc, argv, "decode") || have_flag(argc, argv, "ellswift_keygen") ||
+        have_flag(argc, argv, "ellswift_ecdh")) {
+        fprintf(stderr, "./bench: ElligatorSwift module not enabled.\n");
+        fprintf(stderr, "Use ./configure --enable-module-ellswift.\n\n");
+        return 1;
+    }
+#endif
+
+    /* ECDSA benchmark */
+    data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE);
+
+    for (i = 0; i < 32; i++) {
+        data.msg[i] = 1 + i;
+    }
+    for (i = 0; i < 32; i++) {
+        data.key[i] = 33 + i;
+    }
+    data.siglen = 72;
+    CHECK(secp256k1_ecdsa_sign(data.ctx, &sig, data.msg, data.key, NULL, NULL));
+    CHECK(secp256k1_ecdsa_signature_serialize_der(data.ctx, data.sig, &data.siglen, &sig));
+    CHECK(secp256k1_ec_pubkey_create(data.ctx, &pubkey, data.key));
+    data.pubkeylen = 33;
+    CHECK(secp256k1_ec_pubkey_serialize(data.ctx, data.pubkey, &data.pubkeylen, &pubkey, SECP256K1_EC_COMPRESSED) == 1);
+
+    print_output_table_header_row();
+    if (d || have_flag(argc, argv, "ecdsa") || have_flag(argc, argv, "verify") || have_flag(argc, argv, "ecdsa_verify")) run_benchmark("ecdsa_verify", bench_verify, NULL, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "ecdsa") || have_flag(argc, argv, "sign") || have_flag(argc, argv, "ecdsa_sign")) run_benchmark("ecdsa_sign", bench_sign_run, bench_sign_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "ec") || have_flag(argc, argv, "keygen") || have_flag(argc, argv, "ec_keygen")) run_benchmark("ec_keygen", bench_keygen_run, bench_keygen_setup, NULL, &data, 10, iters);
+
+    secp256k1_context_destroy(data.ctx);
+
+#ifdef ENABLE_MODULE_ECDH
+    /* ECDH benchmarks */
+    run_ecdh_bench(iters, argc, argv);
+#endif
+
+#ifdef ENABLE_MODULE_RECOVERY
+    /* ECDSA recovery benchmarks */
+    run_recovery_bench(iters, argc, argv);
+#endif
+
+#ifdef ENABLE_MODULE_SCHNORRSIG
+    /* Schnorr signature benchmarks */
+    run_schnorrsig_bench(iters, argc, argv);
+#endif
+
+#ifdef ENABLE_MODULE_ELLSWIFT
+    /* ElligatorSwift benchmarks */
+    run_ellswift_bench(iters, argc, argv);
+#endif
+
+    return 0;
+}
--- a/libsecp256k1/src/bench.h
+++ b/libsecp256k1/src/bench.h
@@ -0,0 +1,188 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_BENCH_H
+#define SECP256K1_BENCH_H
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1900)
+#  include <time.h>
+#else
+#  include <sys/time.h>
+#endif
+
+static int64_t gettime_i64(void) {
+#if (defined(_MSC_VER) && _MSC_VER >= 1900)
+    /* C11 way to get wallclock time */
+    struct timespec tv;
+    if (!timespec_get(&tv, TIME_UTC)) {
+        fputs("timespec_get failed!", stderr);
+        exit(1);
+    }
+    return (int64_t)tv.tv_nsec / 1000 + (int64_t)tv.tv_sec * 1000000LL;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (int64_t)tv.tv_usec + (int64_t)tv.tv_sec * 1000000LL;
+#endif
+}
+
+#define FP_EXP (6)
+#define FP_MULT (1000000LL)
+
+/* Format fixed point number. */
+static void print_number(const int64_t x) {
+    int64_t x_abs, y;
+    int c, i, rounding, g; /* g = integer part size, c = fractional part size */
+    size_t ptr;
+    char buffer[30];
+
+    if (x == INT64_MIN) {
+        /* Prevent UB. */
+        printf("ERR");
+        return;
+    }
+    x_abs = x < 0 ? -x : x;
+
+    /* Determine how many decimals we want to show (more than FP_EXP makes no
+     * sense). */
+    y = x_abs;
+    c = 0;
+    while (y > 0LL && y < 100LL * FP_MULT && c < FP_EXP) {
+        y *= 10LL;
+        c++;
+    }
+
+    /* Round to 'c' decimals. */
+    y = x_abs;
+    rounding = 0;
+    for (i = c; i < FP_EXP; ++i) {
+        rounding = (y % 10) >= 5;
+        y /= 10;
+    }
+    y += rounding;
+
+    /* Format and print the number. */
+    ptr = sizeof(buffer) - 1;
+    buffer[ptr] = 0;
+    g = 0;
+    if (c != 0) { /* non zero fractional part */
+        for (i = 0; i < c; ++i) {
+            buffer[--ptr] = '0' + (y % 10);
+            y /= 10;
+        }
+    } else if (c == 0) { /* fractional part is 0 */
+        buffer[--ptr] = '0'; 
+    }
+    buffer[--ptr] = '.';
+    do {
+        buffer[--ptr] = '0' + (y % 10);
+        y /= 10;
+        g++;
+    } while (y != 0);
+    if (x < 0) {
+        buffer[--ptr] = '-';
+        g++;
+    }
+    printf("%5.*s", g, &buffer[ptr]); /* Prints integer part */
+    printf("%-*s", FP_EXP, &buffer[ptr + g]); /* Prints fractional part */
+}
+
+static void run_benchmark(char *name, void (*benchmark)(void*, int), void (*setup)(void*), void (*teardown)(void*, int), void* data, int count, int iter) {
+    int i;
+    int64_t min = INT64_MAX;
+    int64_t sum = 0;
+    int64_t max = 0;
+    for (i = 0; i < count; i++) {
+        int64_t begin, total;
+        if (setup != NULL) {
+            setup(data);
+        }
+        begin = gettime_i64();
+        benchmark(data, iter);
+        total = gettime_i64() - begin;
+        if (teardown != NULL) {
+            teardown(data, iter);
+        }
+        if (total < min) {
+            min = total;
+        }
+        if (total > max) {
+            max = total;
+        }
+        sum += total;
+    }
+    /* ',' is used as a column delimiter */
+    printf("%-30s, ", name);
+    print_number(min * FP_MULT / iter);
+    printf("   , ");
+    print_number(((sum * FP_MULT) / count) / iter);
+    printf("   , ");
+    print_number(max * FP_MULT / iter);
+    printf("\n");
+}
+
+static int have_flag(int argc, char** argv, char *flag) {
+    char** argm = argv + argc;
+    argv++;
+    while (argv != argm) {
+        if (strcmp(*argv, flag) == 0) {
+            return 1;
+        }
+        argv++;
+    }
+    return 0;
+}
+
+/* takes an array containing the arguments that the user is allowed to enter on the command-line
+   returns:
+      - 1 if the user entered an invalid argument
+      - 0 if all the user entered arguments are valid */
+static int have_invalid_args(int argc, char** argv, char** valid_args, size_t n) {
+    size_t i;
+    int found_valid;
+    char** argm = argv + argc;
+    argv++;
+
+    while (argv != argm) {
+        found_valid = 0;
+        for (i = 0; i < n; i++) {
+            if (strcmp(*argv, valid_args[i]) == 0) {
+                found_valid = 1; /* user entered a valid arg from the list */
+                break;
+            }
+        }
+        if (found_valid == 0) {
+            return 1; /* invalid arg found */
+        }
+        argv++;
+    }
+    return 0;
+}
+
+static int get_iters(int default_iters) {
+    char* env = getenv("SECP256K1_BENCH_ITERS");
+    if (env) {
+        return strtol(env, NULL, 0);
+    } else {
+        return default_iters;
+    }
+}
+
+static void print_output_table_header_row(void) {
+    char* bench_str = "Benchmark";     /* left justified */
+    char* min_str = "    Min(us)    "; /* center alignment */
+    char* avg_str = "    Avg(us)    ";
+    char* max_str = "    Max(us)    ";
+    printf("%-30s,%-15s,%-15s,%-15s\n", bench_str, min_str, avg_str, max_str);
+    printf("\n");
+}
+
+#endif /* SECP256K1_BENCH_H */
--- a/libsecp256k1/src/bench_ecmult.c
+++ b/libsecp256k1/src/bench_ecmult.c
@@ -0,0 +1,367 @@
+/***********************************************************************
+ * Copyright (c) 2017 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+#include <stdio.h>
+
+#include "secp256k1.c"
+#include "../include/secp256k1.h"
+
+#include "util.h"
+#include "hash_impl.h"
+#include "field_impl.h"
+#include "group_impl.h"
+#include "scalar_impl.h"
+#include "ecmult_impl.h"
+#include "bench.h"
+
+#define POINTS 32768
+
+static void help(char **argv) {
+    printf("Benchmark EC multiplication algorithms\n");
+    printf("\n");
+    printf("Usage: %s <help|pippenger_wnaf|strauss_wnaf|simple>\n", argv[0]);
+    printf("The output shows the number of multiplied and summed points right after the\n");
+    printf("function name. The letter 'g' indicates that one of the points is the generator.\n");
+    printf("The benchmarks are divided by the number of points.\n");
+    printf("\n");
+    printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n");
+    printf("                        batch size\n");
+    printf("pippenger_wnaf:         for all batch sizes\n");
+    printf("strauss_wnaf:           for all batch sizes\n");
+    printf("simple:                 multiply and sum each point individually\n");
+}
+
+typedef struct {
+    /* Setup once in advance */
+    secp256k1_context* ctx;
+    secp256k1_scratch_space* scratch;
+    secp256k1_scalar* scalars;
+    secp256k1_ge* pubkeys;
+    secp256k1_gej* pubkeys_gej;
+    secp256k1_scalar* seckeys;
+    secp256k1_gej* expected_output;
+    secp256k1_ecmult_multi_func ecmult_multi;
+
+    /* Changes per benchmark */
+    size_t count;
+    int includes_g;
+
+    /* Changes per benchmark iteration, used to pick different scalars and pubkeys
+     * in each run. */
+    size_t offset1;
+    size_t offset2;
+
+    /* Benchmark output. */
+    secp256k1_gej* output;
+} bench_data;
+
+/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
+static void hash_into_offset(bench_data* data, size_t x) {
+    data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
+    data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS;
+}
+
+/* Check correctness of the benchmark by computing
+ * sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */
+static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) {
+    int i;
+    secp256k1_gej sum_output, tmp;
+    secp256k1_scalar sum_scalars;
+
+    secp256k1_gej_set_infinity(&sum_output);
+    secp256k1_scalar_set_int(&sum_scalars, 0);
+    for (i = 0; i < iters; ++i) {
+        secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL);
+        if (scalar_gen_offset != NULL) {
+            secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]);
+        }
+        if (seckey_offset != NULL) {
+            secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS];
+            secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]);
+            secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s);
+        }
+    }
+    secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars);
+    CHECK(secp256k1_gej_eq_var(&tmp, &sum_output));
+}
+
+static void bench_ecmult_setup(void* arg) {
+    bench_data* data = (bench_data*)arg;
+    /* Re-randomize offset to ensure that we're using different scalars and
+     * group elements in each run. */
+    hash_into_offset(data, data->offset1);
+}
+
+static void bench_ecmult_gen(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int i;
+
+    for (i = 0; i < iters; ++i) {
+        secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]);
+    }
+}
+
+static void bench_ecmult_gen_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
+}
+
+static void bench_ecmult_const(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int i;
+
+    for (i = 0; i < iters; ++i) {
+        secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS]);
+    }
+}
+
+static void bench_ecmult_const_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
+}
+
+static void bench_ecmult_1p(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int i;
+
+    for (i = 0; i < iters; ++i) {
+        secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL);
+    }
+}
+
+static void bench_ecmult_1p_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
+}
+
+static void bench_ecmult_0p_g(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int i;
+
+    for (i = 0; i < iters; ++i) {
+        secp256k1_ecmult(&data->output[i], NULL, &secp256k1_scalar_zero, &data->scalars[(data->offset1+i) % POINTS]);
+    }
+}
+
+static void bench_ecmult_0p_g_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
+}
+
+static void bench_ecmult_1p_g(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int i;
+
+    for (i = 0; i < iters/2; ++i) {
+        secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]);
+    }
+}
+
+static void bench_ecmult_1p_g_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2);
+}
+
+static void run_ecmult_bench(bench_data* data, int iters) {
+    char str[32];
+    sprintf(str, "ecmult_gen");
+    run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters);
+    sprintf(str, "ecmult_const");
+    run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters);
+    /* ecmult with non generator point */
+    sprintf(str, "ecmult_1p");
+    run_benchmark(str, bench_ecmult_1p, bench_ecmult_setup, bench_ecmult_1p_teardown, data, 10, iters);
+    /* ecmult with generator point */
+    sprintf(str, "ecmult_0p_g");
+    run_benchmark(str, bench_ecmult_0p_g, bench_ecmult_setup, bench_ecmult_0p_g_teardown, data, 10, iters);
+    /* ecmult with generator and non-generator point. The reported time is per point. */
+    sprintf(str, "ecmult_1p_g");
+    run_benchmark(str, bench_ecmult_1p_g, bench_ecmult_setup, bench_ecmult_1p_g_teardown, data, 10, 2*iters);
+}
+
+static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
+    bench_data* data = (bench_data*)arg;
+    if (data->includes_g) ++idx;
+    if (idx == 0) {
+        *sc = data->scalars[data->offset1];
+        *ge = secp256k1_ge_const_g;
+    } else {
+        *sc = data->scalars[(data->offset1 + idx) % POINTS];
+        *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS];
+    }
+    return 1;
+}
+
+static void bench_ecmult_multi(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+
+    int includes_g = data->includes_g;
+    int iter;
+    int count = data->count;
+    iters = iters / data->count;
+
+    for (iter = 0; iter < iters; ++iter) {
+        data->ecmult_multi(&data->ctx->error_callback, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g);
+        data->offset1 = (data->offset1 + count) % POINTS;
+        data->offset2 = (data->offset2 + count - 1) % POINTS;
+    }
+}
+
+static void bench_ecmult_multi_setup(void* arg) {
+    bench_data* data = (bench_data*)arg;
+    hash_into_offset(data, data->count);
+}
+
+static void bench_ecmult_multi_teardown(void* arg, int iters) {
+    bench_data* data = (bench_data*)arg;
+    int iter;
+    iters = iters / data->count;
+    /* Verify the results in teardown, to avoid doing comparisons while benchmarking. */
+    for (iter = 0; iter < iters; ++iter) {
+        secp256k1_gej tmp;
+        secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL);
+        CHECK(secp256k1_gej_is_infinity(&tmp));
+    }
+}
+
+static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
+    secp256k1_sha256 sha256;
+    unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
+    unsigned char buf[32];
+    int overflow = 0;
+    c[6] = num;
+    c[7] = num >> 8;
+    c[8] = num >> 16;
+    c[9] = num >> 24;
+    secp256k1_sha256_initialize(&sha256);
+    secp256k1_sha256_write(&sha256, c, sizeof(c));
+    secp256k1_sha256_finalize(&sha256, buf);
+    secp256k1_scalar_set_b32(scalar, buf, &overflow);
+    CHECK(!overflow);
+}
+
+static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) {
+    char str[32];
+    size_t iters = 1 + num_iters / count;
+    size_t iter;
+
+    data->count = count;
+    data->includes_g = includes_g;
+
+    /* Compute (the negation of) the expected results directly. */
+    hash_into_offset(data, data->count);
+    for (iter = 0; iter < iters; ++iter) {
+        secp256k1_scalar tmp;
+        secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
+        size_t i = 0;
+        for (i = 0; i + 1 < count; ++i) {
+            secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]);
+            secp256k1_scalar_add(&total, &total, &tmp);
+        }
+        secp256k1_scalar_negate(&total, &total);
+        secp256k1_ecmult(&data->expected_output[iter], NULL, &secp256k1_scalar_zero, &total);
+    }
+
+    /* Run the benchmark. */
+    if (includes_g) {
+        sprintf(str, "ecmult_multi_%ip_g", (int)count - 1);
+    } else {
+        sprintf(str, "ecmult_multi_%ip", (int)count);
+    }
+    run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters);
+}
+
+int main(int argc, char **argv) {
+    bench_data data;
+    int i, p;
+    size_t scratch_size;
+
+    int iters = get_iters(10000);
+
+    data.ecmult_multi = secp256k1_ecmult_multi_var;
+
+    if (argc > 1) {
+        if(have_flag(argc, argv, "-h")
+           || have_flag(argc, argv, "--help")
+           || have_flag(argc, argv, "help")) {
+            help(argv);
+            return 0;
+        } else if(have_flag(argc, argv, "pippenger_wnaf")) {
+            printf("Using pippenger_wnaf:\n");
+            data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
+        } else if(have_flag(argc, argv, "strauss_wnaf")) {
+            printf("Using strauss_wnaf:\n");
+            data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
+        } else if(have_flag(argc, argv, "simple")) {
+            printf("Using simple algorithm:\n");
+        } else {
+            fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
+            help(argv);
+            return 1;
+        }
+    }
+
+    data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE);
+    scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
+    if (!have_flag(argc, argv, "simple")) {
+        data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
+    } else {
+        data.scratch = NULL;
+    }
+
+    /* Allocate stuff */
+    data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
+    data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
+    data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
+    data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
+    data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1));
+    data.output = malloc(sizeof(secp256k1_gej) * (iters + 1));
+
+    /* Generate a set of scalars, and private/public keypairs. */
+    secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g);
+    secp256k1_scalar_set_int(&data.seckeys[0], 1);
+    for (i = 0; i < POINTS; ++i) {
+        generate_scalar(i, &data.scalars[i]);
+        if (i) {
+            secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL);
+            secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
+        }
+    }
+    secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);
+
+
+    print_output_table_header_row();
+    /* Initialize offset1 and offset2 */
+    hash_into_offset(&data, 0);
+    run_ecmult_bench(&data, iters);
+
+    for (i = 1; i <= 8; ++i) {
+        run_ecmult_multi_bench(&data, i, 1, iters);
+    }
+
+    /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
+    * and the higher it goes the longer the computation takes(more points)
+    * So we don't run this benchmark with low iterations to prevent slow down */
+     if (iters > 2) {
+        for (p = 0; p <= 11; ++p) {
+            for (i = 9; i <= 16; ++i) {
+                run_ecmult_multi_bench(&data, i << p, 1, iters);
+            }
+        }
+    }
+
+    if (data.scratch != NULL) {
+        secp256k1_scratch_space_destroy(data.ctx, data.scratch);
+    }
+    secp256k1_context_destroy(data.ctx);
+    free(data.scalars);
+    free(data.pubkeys);
+    free(data.pubkeys_gej);
+    free(data.seckeys);
+    free(data.output);
+    free(data.expected_output);
+
+    return(0);
+}
--- a/libsecp256k1/src/bench_internal.c
+++ b/libsecp256k1/src/bench_internal.c
@@ -0,0 +1,436 @@
+/***********************************************************************
+ * Copyright (c) 2014-2015 Pieter Wuille                               *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+#include <stdio.h>
+
+#include "secp256k1.c"
+#include "../include/secp256k1.h"
+
+#include "assumptions.h"
+#include "util.h"
+#include "hash_impl.h"
+#include "field_impl.h"
+#include "group_impl.h"
+#include "scalar_impl.h"
+#include "ecmult_impl.h"
+#include "bench.h"
+
+static void help(int default_iters) {
+    printf("Benchmarks various internal routines.\n");
+    printf("\n");
+    printf("The default number of iterations for each benchmark is %d. This can be\n", default_iters);
+    printf("customized using the SECP256K1_BENCH_ITERS environment variable.\n");
+    printf("\n");
+    printf("Usage: ./bench_internal [args]\n");
+    printf("By default, all benchmarks will be run.\n");
+    printf("args:\n");
+    printf("    help       : display this help and exit\n");
+    printf("    scalar     : all scalar operations (add, half, inverse, mul, negate, split)\n");
+    printf("    field      : all field operations (half, inverse, issquare, mul, normalize, sqr, sqrt)\n");
+    printf("    group      : all group operations (add, double, to_affine)\n");
+    printf("    ecmult     : all point multiplication operations (ecmult_wnaf) \n");
+    printf("    hash       : all hash algorithms (hmac, rng6979, sha256)\n");
+    printf("    context    : all context object operations (context_create)\n");
+    printf("\n");
+}
+
+typedef struct {
+    secp256k1_scalar scalar[2];
+    secp256k1_fe fe[4];
+    secp256k1_ge ge[2];
+    secp256k1_gej gej[2];
+    unsigned char data[64];
+    int wnaf[256];
+} bench_inv;
+
+static void bench_setup(void* arg) {
+    bench_inv *data = (bench_inv*)arg;
+
+    static const unsigned char init[4][32] = {
+        /* Initializer for scalar[0], fe[0], first half of data, the X coordinate of ge[0],
+           and the (implied affine) X coordinate of gej[0]. */
+        {
+            0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
+            0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
+            0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
+            0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
+        },
+        /* Initializer for scalar[1], fe[1], first half of data, the X coordinate of ge[1],
+           and the (implied affine) X coordinate of gej[1]. */
+        {
+            0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
+            0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
+            0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
+            0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
+        },
+        /* Initializer for fe[2] and the Z coordinate of gej[0]. */
+        {
+            0x3d, 0x2d, 0xef, 0xf4, 0x25, 0x98, 0x4f, 0x5d,
+            0xe2, 0xca, 0x5f, 0x41, 0x3f, 0x3f, 0xce, 0x44,
+            0xaa, 0x2c, 0x53, 0x8a, 0xc6, 0x59, 0x1f, 0x38,
+            0x38, 0x23, 0xe4, 0x11, 0x27, 0xc6, 0xa0, 0xe7
+        },
+        /* Initializer for fe[3] and the Z coordinate of gej[1]. */
+        {
+            0xbd, 0x21, 0xa5, 0xe1, 0x13, 0x50, 0x73, 0x2e,
+            0x52, 0x98, 0xc8, 0x9e, 0xab, 0x00, 0xa2, 0x68,
+            0x43, 0xf5, 0xd7, 0x49, 0x80, 0x72, 0xa7, 0xf3,
+            0xd7, 0x60, 0xe6, 0xab, 0x90, 0x92, 0xdf, 0xc5
+        }
+    };
+
+    secp256k1_scalar_set_b32(&data->scalar[0], init[0], NULL);
+    secp256k1_scalar_set_b32(&data->scalar[1], init[1], NULL);
+    secp256k1_fe_set_b32_limit(&data->fe[0], init[0]);
+    secp256k1_fe_set_b32_limit(&data->fe[1], init[1]);
+    secp256k1_fe_set_b32_limit(&data->fe[2], init[2]);
+    secp256k1_fe_set_b32_limit(&data->fe[3], init[3]);
+    CHECK(secp256k1_ge_set_xo_var(&data->ge[0], &data->fe[0], 0));
+    CHECK(secp256k1_ge_set_xo_var(&data->ge[1], &data->fe[1], 1));
+    secp256k1_gej_set_ge(&data->gej[0], &data->ge[0]);
+    secp256k1_gej_rescale(&data->gej[0], &data->fe[2]);
+    secp256k1_gej_set_ge(&data->gej[1], &data->ge[1]);
+    secp256k1_gej_rescale(&data->gej[1], &data->fe[3]);
+    memcpy(data->data, init[0], 32);
+    memcpy(data->data + 32, init[1], 32);
+}
+
+static void bench_scalar_add(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_scalar_negate(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_negate(&data->scalar[0], &data->scalar[0]);
+    }
+}
+
+static void bench_scalar_half(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_scalar s = data->scalar[0];
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_half(&s, &s);
+    }
+
+    data->scalar[0] = s;
+}
+
+static void bench_scalar_mul(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_mul(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
+    }
+}
+
+static void bench_scalar_split(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_scalar tmp;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_split_lambda(&tmp, &data->scalar[1], &data->scalar[0]);
+        j += secp256k1_scalar_add(&data->scalar[0], &tmp, &data->scalar[1]);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_scalar_inverse(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_inverse(&data->scalar[0], &data->scalar[0]);
+        j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_scalar_inverse_var(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_inverse_var(&data->scalar[0], &data->scalar[0]);
+        j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_field_half(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_half(&data->fe[0]);
+    }
+}
+
+static void bench_field_normalize(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_normalize(&data->fe[0]);
+    }
+}
+
+static void bench_field_normalize_weak(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_normalize_weak(&data->fe[0]);
+    }
+}
+
+static void bench_field_mul(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_mul(&data->fe[0], &data->fe[0], &data->fe[1]);
+    }
+}
+
+static void bench_field_sqr(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_sqr(&data->fe[0], &data->fe[0]);
+    }
+}
+
+static void bench_field_inverse(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_inv(&data->fe[0], &data->fe[0]);
+        secp256k1_fe_add(&data->fe[0], &data->fe[1]);
+    }
+}
+
+static void bench_field_inverse_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_fe_inv_var(&data->fe[0], &data->fe[0]);
+        secp256k1_fe_add(&data->fe[0], &data->fe[1]);
+    }
+}
+
+static void bench_field_sqrt(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_fe t;
+
+    for (i = 0; i < iters; i++) {
+        t = data->fe[0];
+        j += secp256k1_fe_sqrt(&data->fe[0], &t);
+        secp256k1_fe_add(&data->fe[0], &data->fe[1]);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_field_is_square_var(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_fe t = data->fe[0];
+
+    for (i = 0; i < iters; i++) {
+        j += secp256k1_fe_is_square_var(&t);
+        secp256k1_fe_add(&t, &data->fe[1]);
+        secp256k1_fe_normalize_var(&t);
+    }
+    CHECK(j <= iters);
+}
+
+static void bench_group_double_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_gej_double_var(&data->gej[0], &data->gej[0], NULL);
+    }
+}
+
+static void bench_group_add_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_gej_add_var(&data->gej[0], &data->gej[0], &data->gej[1], NULL);
+    }
+}
+
+static void bench_group_add_affine(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_gej_add_ge(&data->gej[0], &data->gej[0], &data->ge[1]);
+    }
+}
+
+static void bench_group_add_affine_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_gej_add_ge_var(&data->gej[0], &data->gej[0], &data->ge[1], NULL);
+    }
+}
+
+static void bench_group_add_zinv_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_gej_add_zinv_var(&data->gej[0], &data->gej[0], &data->ge[1], &data->gej[0].y);
+    }
+}
+
+static void bench_group_to_affine_var(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; ++i) {
+        secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]);
+        /* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates.
+           Note that the resulting coordinates will generally not correspond to a point
+           on the curve, but this is not a problem for the code being benchmarked here.
+           Adding and normalizing have less overhead than EC operations (which could
+           guarantee the point remains on the curve). */
+        secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y);
+        secp256k1_fe_add(&data->gej[0].y, &data->fe[2]);
+        secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x);
+        secp256k1_fe_normalize_var(&data->gej[0].x);
+        secp256k1_fe_normalize_var(&data->gej[0].y);
+        secp256k1_fe_normalize_var(&data->gej[0].z);
+    }
+}
+
+static void bench_ecmult_wnaf(void* arg, int iters) {
+    int i, bits = 0, overflow = 0;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar[0], WINDOW_A);
+        overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
+    }
+    CHECK(overflow >= 0);
+    CHECK(bits <= 256*iters);
+}
+
+static void bench_sha256(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_sha256 sha;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, data->data, 32);
+        secp256k1_sha256_finalize(&sha, data->data);
+    }
+}
+
+static void bench_hmac_sha256(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_hmac_sha256 hmac;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_hmac_sha256_initialize(&hmac, data->data, 32);
+        secp256k1_hmac_sha256_write(&hmac, data->data, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, data->data);
+    }
+}
+
+static void bench_rfc6979_hmac_sha256(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_rfc6979_hmac_sha256 rng;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_rfc6979_hmac_sha256_initialize(&rng, data->data, 64);
+        secp256k1_rfc6979_hmac_sha256_generate(&rng, data->data, 32);
+    }
+}
+
+static void bench_context(void* arg, int iters) {
+    int i;
+    (void)arg;
+    for (i = 0; i < iters; i++) {
+        secp256k1_context_destroy(secp256k1_context_create(SECP256K1_CONTEXT_NONE));
+    }
+}
+
+int main(int argc, char **argv) {
+    bench_inv data;
+    int default_iters = 20000;
+    int iters = get_iters(default_iters);
+    int d = argc == 1; /* default */
+
+    if (argc > 1) {
+        if (have_flag(argc, argv, "-h")
+           || have_flag(argc, argv, "--help")
+           || have_flag(argc, argv, "help")) {
+            help(default_iters);
+            return 0;
+        }
+    }
+
+    print_output_table_header_row();
+
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "half")) run_benchmark("field_half", bench_field_half, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "issquare")) run_benchmark("field_is_square_var", bench_field_is_square_var, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt", bench_field_sqrt, bench_setup, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_zinv_var", bench_group_add_zinv_var, bench_setup, NULL, &data, 10, iters*10);
+    if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "hash") || have_flag(argc, argv, "hmac")) run_benchmark("hash_hmac_sha256", bench_hmac_sha256, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "hash") || have_flag(argc, argv, "rng6979")) run_benchmark("hash_rfc6979_hmac_sha256", bench_rfc6979_hmac_sha256, bench_setup, NULL, &data, 10, iters);
+
+    if (d || have_flag(argc, argv, "context")) run_benchmark("context_create", bench_context, bench_setup, NULL, &data, 10, iters);
+
+    return 0;
+}
--- a/libsecp256k1/src/checkmem.h
+++ b/libsecp256k1/src/checkmem.h
@@ -0,0 +1,102 @@
+/***********************************************************************
+ * Copyright (c) 2022 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+/* The code here is inspired by Kris Kwiatkowski's approach in
+ * https://github.com/kriskwiatkowski/pqc/blob/main/src/common/ct_check.h
+ * to provide a general interface for memory-checking mechanisms, primarily
+ * for constant-time checking.
+ */
+
+/* These macros are defined by this header file:
+ *
+ * - SECP256K1_CHECKMEM_ENABLED:
+ *   - 1 if memory-checking integration is available, 0 otherwise.
+ *     This is just a compile-time macro. Use the next macro to check it is actually
+ *     available at runtime.
+ * - SECP256K1_CHECKMEM_RUNNING():
+ *   - Acts like a function call, returning 1 if memory checking is available
+ *     at runtime.
+ * - SECP256K1_CHECKMEM_CHECK(p, len):
+ *   - Assert or otherwise fail in case the len-byte memory block pointed to by p is
+ *     not considered entirely defined.
+ * - SECP256K1_CHECKMEM_CHECK_VERIFY(p, len):
+ *   - Like SECP256K1_CHECKMEM_CHECK, but only works in VERIFY mode.
+ * - SECP256K1_CHECKMEM_UNDEFINE(p, len):
+ *   - marks the len-byte memory block pointed to by p as undefined data (secret data,
+ *     in the context of constant-time checking).
+ * - SECP256K1_CHECKMEM_DEFINE(p, len):
+ *   - marks the len-byte memory pointed to by p as defined data (public data, in the
+ *     context of constant-time checking).
+ * - SECP256K1_CHECKMEM_MSAN_DEFINE(p, len):
+ *   - Like SECP256K1_CHECKMEM_DEFINE, but applies only to memory_sanitizer.
+ *
+ */
+
+#ifndef SECP256K1_CHECKMEM_H
+#define SECP256K1_CHECKMEM_H
+
+/* Define a statement-like macro that ignores the arguments. */
+#define SECP256K1_CHECKMEM_NOOP(p, len) do { (void)(p); (void)(len); } while(0)
+
+/* If compiling under msan, map the SECP256K1_CHECKMEM_* functionality to msan.
+ * Choose this preferentially, even when VALGRIND is defined, as msan-compiled
+ * binaries can't be run under valgrind anyway. */
+#if defined(__has_feature)
+#  if __has_feature(memory_sanitizer)
+#    include <sanitizer/msan_interface.h>
+#    define SECP256K1_CHECKMEM_ENABLED 1
+#    define SECP256K1_CHECKMEM_UNDEFINE(p, len) __msan_allocated_memory((p), (len))
+#    define SECP256K1_CHECKMEM_DEFINE(p, len) __msan_unpoison((p), (len))
+#    define SECP256K1_CHECKMEM_MSAN_DEFINE(p, len) __msan_unpoison((p), (len))
+#    define SECP256K1_CHECKMEM_CHECK(p, len) __msan_check_mem_is_initialized((p), (len))
+#    define SECP256K1_CHECKMEM_RUNNING() (1)
+#  endif
+#endif
+
+#if !defined SECP256K1_CHECKMEM_MSAN_DEFINE
+#  define SECP256K1_CHECKMEM_MSAN_DEFINE(p, len) SECP256K1_CHECKMEM_NOOP((p), (len))
+#endif
+
+/* If valgrind integration is desired (through the VALGRIND define), implement the
+ * SECP256K1_CHECKMEM_* macros using valgrind. */
+#if !defined SECP256K1_CHECKMEM_ENABLED
+#  if defined VALGRIND
+#    include <stddef.h>
+#  if defined(__clang__) && defined(__APPLE__)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wreserved-identifier"
+#  endif
+#    include <valgrind/memcheck.h>
+#  if defined(__clang__) && defined(__APPLE__)
+#    pragma clang diagnostic pop
+#  endif
+#    define SECP256K1_CHECKMEM_ENABLED 1
+#    define SECP256K1_CHECKMEM_UNDEFINE(p, len) VALGRIND_MAKE_MEM_UNDEFINED((p), (len))
+#    define SECP256K1_CHECKMEM_DEFINE(p, len) VALGRIND_MAKE_MEM_DEFINED((p), (len))
+#    define SECP256K1_CHECKMEM_CHECK(p, len) VALGRIND_CHECK_MEM_IS_DEFINED((p), (len))
+     /* VALGRIND_MAKE_MEM_DEFINED returns 0 iff not running on memcheck.
+      * This is more precise than the RUNNING_ON_VALGRIND macro, which
+      * checks for valgrind in general instead of memcheck specifically. */
+#    define SECP256K1_CHECKMEM_RUNNING() (VALGRIND_MAKE_MEM_DEFINED(NULL, 0) != 0)
+#  endif
+#endif
+
+/* As a fall-back, map these macros to dummy statements. */
+#if !defined SECP256K1_CHECKMEM_ENABLED
+#  define SECP256K1_CHECKMEM_ENABLED 0
+#  define SECP256K1_CHECKMEM_UNDEFINE(p, len) SECP256K1_CHECKMEM_NOOP((p), (len))
+#  define SECP256K1_CHECKMEM_DEFINE(p, len) SECP256K1_CHECKMEM_NOOP((p), (len))
+#  define SECP256K1_CHECKMEM_CHECK(p, len) SECP256K1_CHECKMEM_NOOP((p), (len))
+#  define SECP256K1_CHECKMEM_RUNNING() (0)
+#endif
+
+#if defined VERIFY
+#define SECP256K1_CHECKMEM_CHECK_VERIFY(p, len) SECP256K1_CHECKMEM_CHECK((p), (len))
+#else
+#define SECP256K1_CHECKMEM_CHECK_VERIFY(p, len) SECP256K1_CHECKMEM_NOOP((p), (len))
+#endif
+
+#endif /* SECP256K1_CHECKMEM_H */
--- a/libsecp256k1/src/ecdsa.h
+++ b/libsecp256k1/src/ecdsa.h
@@ -0,0 +1,21 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECDSA_H
+#define SECP256K1_ECDSA_H
+
+#include <stddef.h>
+
+#include "scalar.h"
+#include "group.h"
+#include "ecmult.h"
+
+static int secp256k1_ecdsa_sig_parse(secp256k1_scalar *r, secp256k1_scalar *s, const unsigned char *sig, size_t size);
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, size_t *size, const secp256k1_scalar *r, const secp256k1_scalar *s);
+static int secp256k1_ecdsa_sig_verify(const secp256k1_scalar* r, const secp256k1_scalar* s, const secp256k1_ge *pubkey, const secp256k1_scalar *message);
+static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context *ctx, secp256k1_scalar* r, secp256k1_scalar* s, const secp256k1_scalar *seckey, const secp256k1_scalar *message, const secp256k1_scalar *nonce, int *recid);
+
+#endif /* SECP256K1_ECDSA_H */
--- a/libsecp256k1/src/ecdsa_impl.h
+++ b/libsecp256k1/src/ecdsa_impl.h
@@ -0,0 +1,304 @@
+/***********************************************************************
+ * Copyright (c) 2013-2015 Pieter Wuille                               *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+
+#ifndef SECP256K1_ECDSA_IMPL_H
+#define SECP256K1_ECDSA_IMPL_H
+
+#include "scalar.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult.h"
+#include "ecmult_gen.h"
+#include "ecdsa.h"
+
+/** Group order for secp256k1 defined as 'n' in "Standards for Efficient Cryptography" (SEC2) 2.7.1
+ *  $ sage -c 'load("secp256k1_params.sage"); print(hex(N))'
+ *  0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141
+ */
+static const secp256k1_fe secp256k1_ecdsa_const_order_as_fe = SECP256K1_FE_CONST(
+    0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+    0xBAAEDCE6UL, 0xAF48A03BUL, 0xBFD25E8CUL, 0xD0364141UL
+);
+
+/** Difference between field and order, values 'p' and 'n' values defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ *  $ sage -c 'load("secp256k1_params.sage"); print(hex(P-N))'
+ *  0x14551231950b75fc4402da1722fc9baee
+ */
+static const secp256k1_fe secp256k1_ecdsa_const_p_minus_order = SECP256K1_FE_CONST(
+    0, 0, 0, 1, 0x45512319UL, 0x50B75FC4UL, 0x402DA172UL, 0x2FC9BAEEUL
+);
+
+static int secp256k1_der_read_len(size_t *len, const unsigned char **sigp, const unsigned char *sigend) {
+    size_t lenleft;
+    unsigned char b1;
+    VERIFY_CHECK(len != NULL);
+    *len = 0;
+    if (*sigp >= sigend) {
+        return 0;
+    }
+    b1 = *((*sigp)++);
+    if (b1 == 0xFF) {
+        /* X.690-0207 8.1.3.5.c the value 0xFF shall not be used. */
+        return 0;
+    }
+    if ((b1 & 0x80) == 0) {
+        /* X.690-0207 8.1.3.4 short form length octets */
+        *len = b1;
+        return 1;
+    }
+    if (b1 == 0x80) {
+        /* Indefinite length is not allowed in DER. */
+        return 0;
+    }
+    /* X.690-207 8.1.3.5 long form length octets */
+    lenleft = b1 & 0x7F; /* lenleft is at least 1 */
+    if (lenleft > (size_t)(sigend - *sigp)) {
+        return 0;
+    }
+    if (**sigp == 0) {
+        /* Not the shortest possible length encoding. */
+        return 0;
+    }
+    if (lenleft > sizeof(size_t)) {
+        /* The resulting length would exceed the range of a size_t, so
+         * it is certainly longer than the passed array size. */
+        return 0;
+    }
+    while (lenleft > 0) {
+        *len = (*len << 8) | **sigp;
+        (*sigp)++;
+        lenleft--;
+    }
+    if (*len > (size_t)(sigend - *sigp)) {
+        /* Result exceeds the length of the passed array.
+           (Checking this is the responsibility of the caller but it
+           can't hurt do it here, too.) */
+        return 0;
+    }
+    if (*len < 128) {
+        /* Not the shortest possible length encoding. */
+        return 0;
+    }
+    return 1;
+}
+
+static int secp256k1_der_parse_integer(secp256k1_scalar *r, const unsigned char **sig, const unsigned char *sigend) {
+    int overflow = 0;
+    unsigned char ra[32] = {0};
+    size_t rlen;
+
+    if (*sig == sigend || **sig != 0x02) {
+        /* Not a primitive integer (X.690-0207 8.3.1). */
+        return 0;
+    }
+    (*sig)++;
+    if (secp256k1_der_read_len(&rlen, sig, sigend) == 0) {
+        return 0;
+    }
+    if (rlen == 0 || rlen > (size_t)(sigend - *sig)) {
+        /* Exceeds bounds or not at least length 1 (X.690-0207 8.3.1).  */
+        return 0;
+    }
+    if (**sig == 0x00 && rlen > 1 && (((*sig)[1]) & 0x80) == 0x00) {
+        /* Excessive 0x00 padding. */
+        return 0;
+    }
+    if (**sig == 0xFF && rlen > 1 && (((*sig)[1]) & 0x80) == 0x80) {
+        /* Excessive 0xFF padding. */
+        return 0;
+    }
+    if ((**sig & 0x80) == 0x80) {
+        /* Negative. */
+        overflow = 1;
+    }
+    /* There is at most one leading zero byte:
+     * if there were two leading zero bytes, we would have failed and returned 0
+     * because of excessive 0x00 padding already. */
+    if (rlen > 0 && **sig == 0) {
+        /* Skip leading zero byte */
+        rlen--;
+        (*sig)++;
+    }
+    if (rlen > 32) {
+        overflow = 1;
+    }
+    if (!overflow) {
+        if (rlen) memcpy(ra + 32 - rlen, *sig, rlen);
+        secp256k1_scalar_set_b32(r, ra, &overflow);
+    }
+    if (overflow) {
+        secp256k1_scalar_set_int(r, 0);
+    }
+    (*sig) += rlen;
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_parse(secp256k1_scalar *rr, secp256k1_scalar *rs, const unsigned char *sig, size_t size) {
+    const unsigned char *sigend = sig + size;
+    size_t rlen;
+    if (sig == sigend || *(sig++) != 0x30) {
+        /* The encoding doesn't start with a constructed sequence (X.690-0207 8.9.1). */
+        return 0;
+    }
+    if (secp256k1_der_read_len(&rlen, &sig, sigend) == 0) {
+        return 0;
+    }
+    if (rlen != (size_t)(sigend - sig)) {
+        /* Tuple exceeds bounds or garage after tuple. */
+        return 0;
+    }
+
+    if (!secp256k1_der_parse_integer(rr, &sig, sigend)) {
+        return 0;
+    }
+    if (!secp256k1_der_parse_integer(rs, &sig, sigend)) {
+        return 0;
+    }
+
+    if (sig != sigend) {
+        /* Trailing garbage inside tuple. */
+        return 0;
+    }
+
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, size_t *size, const secp256k1_scalar* ar, const secp256k1_scalar* as) {
+    unsigned char r[33] = {0}, s[33] = {0};
+    unsigned char *rp = r, *sp = s;
+    size_t lenR = 33, lenS = 33;
+    secp256k1_scalar_get_b32(&r[1], ar);
+    secp256k1_scalar_get_b32(&s[1], as);
+    while (lenR > 1 && rp[0] == 0 && rp[1] < 0x80) { lenR--; rp++; }
+    while (lenS > 1 && sp[0] == 0 && sp[1] < 0x80) { lenS--; sp++; }
+    if (*size < 6+lenS+lenR) {
+        *size = 6 + lenS + lenR;
+        return 0;
+    }
+    *size = 6 + lenS + lenR;
+    sig[0] = 0x30;
+    sig[1] = 4 + lenS + lenR;
+    sig[2] = 0x02;
+    sig[3] = lenR;
+    memcpy(sig+4, rp, lenR);
+    sig[4+lenR] = 0x02;
+    sig[5+lenR] = lenS;
+    memcpy(sig+lenR+6, sp, lenS);
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_verify(const secp256k1_scalar *sigr, const secp256k1_scalar *sigs, const secp256k1_ge *pubkey, const secp256k1_scalar *message) {
+    unsigned char c[32];
+    secp256k1_scalar sn, u1, u2;
+#if !defined(EXHAUSTIVE_TEST_ORDER)
+    secp256k1_fe xr;
+#endif
+    secp256k1_gej pubkeyj;
+    secp256k1_gej pr;
+
+    if (secp256k1_scalar_is_zero(sigr) || secp256k1_scalar_is_zero(sigs)) {
+        return 0;
+    }
+
+    secp256k1_scalar_inverse_var(&sn, sigs);
+    secp256k1_scalar_mul(&u1, &sn, message);
+    secp256k1_scalar_mul(&u2, &sn, sigr);
+    secp256k1_gej_set_ge(&pubkeyj, pubkey);
+    secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
+    if (secp256k1_gej_is_infinity(&pr)) {
+        return 0;
+    }
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+{
+    secp256k1_scalar computed_r;
+    secp256k1_ge pr_ge;
+    secp256k1_ge_set_gej(&pr_ge, &pr);
+    secp256k1_fe_normalize(&pr_ge.x);
+
+    secp256k1_fe_get_b32(c, &pr_ge.x);
+    secp256k1_scalar_set_b32(&computed_r, c, NULL);
+    return secp256k1_scalar_eq(sigr, &computed_r);
+}
+#else
+    secp256k1_scalar_get_b32(c, sigr);
+    /* we can ignore the fe_set_b32_limit return value, because we know the input is in range */
+    (void)secp256k1_fe_set_b32_limit(&xr, c);
+
+    /** We now have the recomputed R point in pr, and its claimed x coordinate (modulo n)
+     *  in xr. Naively, we would extract the x coordinate from pr (requiring a inversion modulo p),
+     *  compute the remainder modulo n, and compare it to xr. However:
+     *
+     *        xr == X(pr) mod n
+     *    <=> exists h. (xr + h * n < p && xr + h * n == X(pr))
+     *    [Since 2 * n > p, h can only be 0 or 1]
+     *    <=> (xr == X(pr)) || (xr + n < p && xr + n == X(pr))
+     *    [In Jacobian coordinates, X(pr) is pr.x / pr.z^2 mod p]
+     *    <=> (xr == pr.x / pr.z^2 mod p) || (xr + n < p && xr + n == pr.x / pr.z^2 mod p)
+     *    [Multiplying both sides of the equations by pr.z^2 mod p]
+     *    <=> (xr * pr.z^2 mod p == pr.x) || (xr + n < p && (xr + n) * pr.z^2 mod p == pr.x)
+     *
+     *  Thus, we can avoid the inversion, but we have to check both cases separately.
+     *  secp256k1_gej_eq_x implements the (xr * pr.z^2 mod p == pr.x) test.
+     */
+    if (secp256k1_gej_eq_x_var(&xr, &pr)) {
+        /* xr * pr.z^2 mod p == pr.x, so the signature is valid. */
+        return 1;
+    }
+    if (secp256k1_fe_cmp_var(&xr, &secp256k1_ecdsa_const_p_minus_order) >= 0) {
+        /* xr + n >= p, so we can skip testing the second case. */
+        return 0;
+    }
+    secp256k1_fe_add(&xr, &secp256k1_ecdsa_const_order_as_fe);
+    if (secp256k1_gej_eq_x_var(&xr, &pr)) {
+        /* (xr + n) * pr.z^2 mod p == pr.x, so the signature is valid. */
+        return 1;
+    }
+    return 0;
+#endif
+}
+
+static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context *ctx, secp256k1_scalar *sigr, secp256k1_scalar *sigs, const secp256k1_scalar *seckey, const secp256k1_scalar *message, const secp256k1_scalar *nonce, int *recid) {
+    unsigned char b[32];
+    secp256k1_gej rp;
+    secp256k1_ge r;
+    secp256k1_scalar n;
+    int overflow = 0;
+    int high;
+
+    secp256k1_ecmult_gen(ctx, &rp, nonce);
+    secp256k1_ge_set_gej(&r, &rp);
+    secp256k1_fe_normalize(&r.x);
+    secp256k1_fe_normalize(&r.y);
+    secp256k1_fe_get_b32(b, &r.x);
+    secp256k1_scalar_set_b32(sigr, b, &overflow);
+    if (recid) {
+        /* The overflow condition is cryptographically unreachable as hitting it requires finding the discrete log
+         * of some P where P.x >= order, and only 1 in about 2^127 points meet this criteria.
+         */
+        *recid = (overflow << 1) | secp256k1_fe_is_odd(&r.y);
+    }
+    secp256k1_scalar_mul(&n, sigr, seckey);
+    secp256k1_scalar_add(&n, &n, message);
+    secp256k1_scalar_inverse(sigs, nonce);
+    secp256k1_scalar_mul(sigs, sigs, &n);
+    secp256k1_scalar_clear(&n);
+    secp256k1_gej_clear(&rp);
+    secp256k1_ge_clear(&r);
+    high = secp256k1_scalar_is_high(sigs);
+    secp256k1_scalar_cond_negate(sigs, high);
+    if (recid) {
+        *recid ^= high;
+    }
+    /* P.x = order is on the curve, so technically sig->r could end up being zero, which would be an invalid signature.
+     * This is cryptographically unreachable as hitting it requires finding the discrete log of P.x = N.
+     */
+    return (int)(!secp256k1_scalar_is_zero(sigr)) & (int)(!secp256k1_scalar_is_zero(sigs));
+}
+
+#endif /* SECP256K1_ECDSA_IMPL_H */
--- a/libsecp256k1/src/eckey.h
+++ b/libsecp256k1/src/eckey.h
@@ -0,0 +1,25 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECKEY_H
+#define SECP256K1_ECKEY_H
+
+#include <stddef.h>
+
+#include "group.h"
+#include "scalar.h"
+#include "ecmult.h"
+#include "ecmult_gen.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge *elem, const unsigned char *pub, size_t size);
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge *elem, unsigned char *pub, size_t *size, int compressed);
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar *key, const secp256k1_scalar *tweak);
+static int secp256k1_eckey_pubkey_tweak_add(secp256k1_ge *key, const secp256k1_scalar *tweak);
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar *key, const secp256k1_scalar *tweak);
+static int secp256k1_eckey_pubkey_tweak_mul(secp256k1_ge *key, const secp256k1_scalar *tweak);
+
+#endif /* SECP256K1_ECKEY_H */
--- a/libsecp256k1/src/eckey_impl.h
+++ b/libsecp256k1/src/eckey_impl.h
@@ -0,0 +1,92 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECKEY_IMPL_H
+#define SECP256K1_ECKEY_IMPL_H
+
+#include "eckey.h"
+
+#include "scalar.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult_gen.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge *elem, const unsigned char *pub, size_t size) {
+    if (size == 33 && (pub[0] == SECP256K1_TAG_PUBKEY_EVEN || pub[0] == SECP256K1_TAG_PUBKEY_ODD)) {
+        secp256k1_fe x;
+        return secp256k1_fe_set_b32_limit(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == SECP256K1_TAG_PUBKEY_ODD);
+    } else if (size == 65 && (pub[0] == SECP256K1_TAG_PUBKEY_UNCOMPRESSED || pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_EVEN || pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_ODD)) {
+        secp256k1_fe x, y;
+        if (!secp256k1_fe_set_b32_limit(&x, pub+1) || !secp256k1_fe_set_b32_limit(&y, pub+33)) {
+            return 0;
+        }
+        secp256k1_ge_set_xy(elem, &x, &y);
+        if ((pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_EVEN || pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_ODD) &&
+            secp256k1_fe_is_odd(&y) != (pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_ODD)) {
+            return 0;
+        }
+        return secp256k1_ge_is_valid_var(elem);
+    } else {
+        return 0;
+    }
+}
+
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge *elem, unsigned char *pub, size_t *size, int compressed) {
+    if (secp256k1_ge_is_infinity(elem)) {
+        return 0;
+    }
+    secp256k1_fe_normalize_var(&elem->x);
+    secp256k1_fe_normalize_var(&elem->y);
+    secp256k1_fe_get_b32(&pub[1], &elem->x);
+    if (compressed) {
+        *size = 33;
+        pub[0] = secp256k1_fe_is_odd(&elem->y) ? SECP256K1_TAG_PUBKEY_ODD : SECP256K1_TAG_PUBKEY_EVEN;
+    } else {
+        *size = 65;
+        pub[0] = SECP256K1_TAG_PUBKEY_UNCOMPRESSED;
+        secp256k1_fe_get_b32(&pub[33], &elem->y);
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar *key, const secp256k1_scalar *tweak) {
+    secp256k1_scalar_add(key, key, tweak);
+    return !secp256k1_scalar_is_zero(key);
+}
+
+static int secp256k1_eckey_pubkey_tweak_add(secp256k1_ge *key, const secp256k1_scalar *tweak) {
+    secp256k1_gej pt;
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_ecmult(&pt, &pt, &secp256k1_scalar_one, tweak);
+
+    if (secp256k1_gej_is_infinity(&pt)) {
+        return 0;
+    }
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar *key, const secp256k1_scalar *tweak) {
+    int ret;
+    ret = !secp256k1_scalar_is_zero(tweak);
+
+    secp256k1_scalar_mul(key, key, tweak);
+    return ret;
+}
+
+static int secp256k1_eckey_pubkey_tweak_mul(secp256k1_ge *key, const secp256k1_scalar *tweak) {
+    secp256k1_gej pt;
+    if (secp256k1_scalar_is_zero(tweak)) {
+        return 0;
+    }
+
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_ecmult(&pt, &pt, tweak, &secp256k1_scalar_zero);
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+#endif /* SECP256K1_ECKEY_IMPL_H */
--- a/libsecp256k1/src/ecmult.h
+++ b/libsecp256k1/src/ecmult.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra       *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_H
+#define SECP256K1_ECMULT_H
+
+#include "group.h"
+#include "scalar.h"
+#include "scratch.h"
+
+#ifndef ECMULT_WINDOW_SIZE
+#  define ECMULT_WINDOW_SIZE 15
+#  ifdef DEBUG_CONFIG
+#     pragma message DEBUG_CONFIG_MSG("ECMULT_WINDOW_SIZE undefined, assuming default value")
+#  endif
+#endif
+
+#ifdef DEBUG_CONFIG
+#  pragma message DEBUG_CONFIG_DEF(ECMULT_WINDOW_SIZE)
+#endif
+
+/* No one will ever need more than a window size of 24. The code might
+ * be correct for larger values of ECMULT_WINDOW_SIZE but this is not
+ * tested.
+ *
+ * The following limitations are known, and there are probably more:
+ * If WINDOW_G > 27 and size_t has 32 bits, then the code is incorrect
+ * because the size of the memory object that we allocate (in bytes)
+ * will not fit in a size_t.
+ * If WINDOW_G > 31 and int has 32 bits, then the code is incorrect
+ * because certain expressions will overflow.
+ */
+#if ECMULT_WINDOW_SIZE < 2 || ECMULT_WINDOW_SIZE > 24
+#  error Set ECMULT_WINDOW_SIZE to an integer in range [2..24].
+#endif
+
+/** The number of entries a table with precomputed multiples needs to have. */
+#define ECMULT_TABLE_SIZE(w) (1L << ((w)-2))
+
+/** Double multiply: R = na*A + ng*G */
+static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng);
+
+typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data);
+
+/**
+ * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai.
+ * Chooses the right algorithm for a given number of points and scratch space
+ * size. Resets and overwrites the given scratch space. If the points do not
+ * fit in the scratch space the algorithm is repeatedly run with batches of
+ * points. If no scratch space is given then a simple algorithm is used that
+ * simply multiplies the points with the corresponding scalars and adds them up.
+ * Returns: 1 on success (including when inp_g_sc is NULL and n is 0)
+ *          0 if there is not enough scratch space for a single point or
+ *          callback returns 0
+ */
+static int secp256k1_ecmult_multi_var(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n);
+
+#endif /* SECP256K1_ECMULT_H */
--- a/libsecp256k1/src/ecmult_compute_table.h
+++ b/libsecp256k1/src/ecmult_compute_table.h
@@ -0,0 +1,16 @@
+/*****************************************************************************************************
+ * Copyright (c) 2013, 2014, 2017, 2021 Pieter Wuille, Andrew Poelstra, Jonas Nick, Russell O'Connor *
+ * Distributed under the MIT software license, see the accompanying                                  *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.                              *
+ *****************************************************************************************************/
+
+#ifndef SECP256K1_ECMULT_COMPUTE_TABLE_H
+#define SECP256K1_ECMULT_COMPUTE_TABLE_H
+
+/* Construct table of all odd multiples of gen in range 1..(2**(window_g-1)-1). */
+static void secp256k1_ecmult_compute_table(secp256k1_ge_storage* table, int window_g, const secp256k1_gej* gen);
+
+/* Like secp256k1_ecmult_compute_table, but one for both gen and gen*2^128. */
+static void secp256k1_ecmult_compute_two_tables(secp256k1_ge_storage* table, secp256k1_ge_storage* table_128, int window_g, const secp256k1_ge* gen);
+
+#endif /* SECP256K1_ECMULT_COMPUTE_TABLE_H */
--- a/libsecp256k1/src/ecmult_compute_table_impl.h
+++ b/libsecp256k1/src/ecmult_compute_table_impl.h
@@ -0,0 +1,49 @@
+/*****************************************************************************************************
+ * Copyright (c) 2013, 2014, 2017, 2021 Pieter Wuille, Andrew Poelstra, Jonas Nick, Russell O'Connor *
+ * Distributed under the MIT software license, see the accompanying                                  *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.                              *
+ *****************************************************************************************************/
+
+#ifndef SECP256K1_ECMULT_COMPUTE_TABLE_IMPL_H
+#define SECP256K1_ECMULT_COMPUTE_TABLE_IMPL_H
+
+#include "ecmult_compute_table.h"
+#include "group_impl.h"
+#include "field_impl.h"
+#include "ecmult.h"
+#include "util.h"
+
+static void secp256k1_ecmult_compute_table(secp256k1_ge_storage* table, int window_g, const secp256k1_gej* gen) {
+    secp256k1_gej gj;
+    secp256k1_ge ge, dgen;
+    int j;
+
+    gj = *gen;
+    secp256k1_ge_set_gej_var(&ge, &gj);
+    secp256k1_ge_to_storage(&table[0], &ge);
+
+    secp256k1_gej_double_var(&gj, gen, NULL);
+    secp256k1_ge_set_gej_var(&dgen, &gj);
+
+    for (j = 1; j < ECMULT_TABLE_SIZE(window_g); ++j) {
+        secp256k1_gej_set_ge(&gj, &ge);
+        secp256k1_gej_add_ge_var(&gj, &gj, &dgen, NULL);
+        secp256k1_ge_set_gej_var(&ge, &gj);
+        secp256k1_ge_to_storage(&table[j], &ge);
+    }
+}
+
+/* Like secp256k1_ecmult_compute_table, but one for both gen and gen*2^128. */
+static void secp256k1_ecmult_compute_two_tables(secp256k1_ge_storage* table, secp256k1_ge_storage* table_128, int window_g, const secp256k1_ge* gen) {
+    secp256k1_gej gj;
+    int i;
+
+    secp256k1_gej_set_ge(&gj, gen);
+    secp256k1_ecmult_compute_table(table, window_g, &gj);
+    for (i = 0; i < 128; ++i) {
+        secp256k1_gej_double_var(&gj, &gj, NULL);
+    }
+    secp256k1_ecmult_compute_table(table_128, window_g, &gj);
+}
+
+#endif /* SECP256K1_ECMULT_COMPUTE_TABLE_IMPL_H */
--- a/libsecp256k1/src/ecmult_const.h
+++ b/libsecp256k1/src/ecmult_const.h
@@ -0,0 +1,38 @@
+/***********************************************************************
+ * Copyright (c) 2015 Andrew Poelstra                                  *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_CONST_H
+#define SECP256K1_ECMULT_CONST_H
+
+#include "scalar.h"
+#include "group.h"
+
+/**
+ * Multiply: R = q*A (in constant-time for q)
+ */
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q);
+
+/**
+ * Same as secp256k1_ecmult_const, but takes in an x coordinate of the base point
+ * only, specified as fraction n/d (numerator/denominator). Only the x coordinate of the result is
+ * returned.
+ *
+ * If known_on_curve is 0, a verification is performed that n/d is a valid X
+ * coordinate, and 0 is returned if not. Otherwise, 1 is returned.
+ *
+ * d being NULL is interpreted as d=1. If non-NULL, d must not be zero. q must not be zero.
+ *
+ * Constant time in the value of q, but not any other inputs.
+ */
+static int secp256k1_ecmult_const_xonly(
+    secp256k1_fe *r,
+    const secp256k1_fe *n,
+    const secp256k1_fe *d,
+    const secp256k1_scalar *q,
+    int known_on_curve
+);
+
+#endif /* SECP256K1_ECMULT_CONST_H */
--- a/libsecp256k1/src/ecmult_const_impl.h
+++ b/libsecp256k1/src/ecmult_const_impl.h
@@ -0,0 +1,399 @@
+/***********************************************************************
+ * Copyright (c) 2015, 2022 Pieter Wuille, Andrew Poelstra             *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_CONST_IMPL_H
+#define SECP256K1_ECMULT_CONST_IMPL_H
+
+#include "scalar.h"
+#include "group.h"
+#include "ecmult_const.h"
+#include "ecmult_impl.h"
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+/* We need 2^ECMULT_CONST_GROUP_SIZE - 1 to be less than EXHAUSTIVE_TEST_ORDER, because
+ * the tables cannot have infinities in them (this breaks the effective-affine technique's
+ * z-ratio tracking) */
+#  if EXHAUSTIVE_TEST_ORDER == 199
+#    define ECMULT_CONST_GROUP_SIZE 4
+#  elif EXHAUSTIVE_TEST_ORDER == 13
+#    define ECMULT_CONST_GROUP_SIZE 3
+#  elif EXHAUSTIVE_TEST_ORDER == 7
+#    define ECMULT_CONST_GROUP_SIZE 2
+#  else
+#    error "Unknown EXHAUSTIVE_TEST_ORDER"
+#  endif
+#else
+/* Group size 4 or 5 appears optimal. */
+#  define ECMULT_CONST_GROUP_SIZE 5
+#endif
+
+#define ECMULT_CONST_TABLE_SIZE (1L << (ECMULT_CONST_GROUP_SIZE - 1))
+#define ECMULT_CONST_GROUPS ((129 + ECMULT_CONST_GROUP_SIZE - 1) / ECMULT_CONST_GROUP_SIZE)
+#define ECMULT_CONST_BITS (ECMULT_CONST_GROUPS * ECMULT_CONST_GROUP_SIZE)
+
+/** Fill a table 'pre' with precomputed odd multiples of a.
+ *
+ *  The resulting point set is brought to a single constant Z denominator, stores the X and Y
+ *  coordinates as ge points in pre, and stores the global Z in globalz.
+ *
+ *  'pre' must be an array of size ECMULT_CONST_TABLE_SIZE.
+ */
+static void secp256k1_ecmult_const_odd_multiples_table_globalz(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) {
+    secp256k1_fe zr[ECMULT_CONST_TABLE_SIZE];
+
+    secp256k1_ecmult_odd_multiples_table(ECMULT_CONST_TABLE_SIZE, pre, zr, globalz, a);
+    secp256k1_ge_table_set_globalz(ECMULT_CONST_TABLE_SIZE, pre, zr);
+}
+
+/* Given a table 'pre' with odd multiples of a point, put in r the signed-bit multiplication of n with that point.
+ *
+ * For example, if ECMULT_CONST_GROUP_SIZE is 4, then pre is expected to contain 8 entries:
+ * [1*P, 3*P, 5*P, 7*P, 9*P, 11*P, 13*P, 15*P]. n is then expected to be a 4-bit integer (range 0-15), and its
+ * bits are interpreted as signs of powers of two to look up.
+ *
+ * For example, if n=4, which is 0100 in binary, which is interpreted as [- + - -], so the looked up value is
+ * [ -(2^3) + (2^2) - (2^1) - (2^0) ]*P = -7*P. Every valid n translates to an odd number in range [-15,15],
+ * which means we just need to look up one of the precomputed values, and optionally negate it.
+ */
+#define ECMULT_CONST_TABLE_GET_GE(r,pre,n) do { \
+    unsigned int m = 0; \
+    /* If the top bit of n is 0, we want the negation. */ \
+    volatile unsigned int negative = ((n) >> (ECMULT_CONST_GROUP_SIZE - 1)) ^ 1; \
+    /* Let n[i] be the i-th bit of n, then the index is
+     *     sum(cnot(n[i]) * 2^i, i=0..l-2)
+     * where cnot(b) = b if n[l-1] = 1 and 1 - b otherwise.
+     * For example, if n = 4, in binary 0100, the index is 3, in binary 011.
+     *
+     * Proof:
+     *     Let
+     *         x = sum((2*n[i] - 1)*2^i, i=0..l-1)
+     *           = 2*sum(n[i] * 2^i, i=0..l-1) - 2^l + 1
+     *     be the value represented by n.
+     *     The index is (x - 1)/2 if x > 0 and -(x + 1)/2 otherwise.
+     *     Case x > 0:
+     *         n[l-1] = 1
+     *         index = sum(n[i] * 2^i, i=0..l-1) - 2^(l-1)
+     *               = sum(n[i] * 2^i, i=0..l-2)
+     *     Case x <= 0:
+     *         n[l-1] = 0
+     *          index = -(2*sum(n[i] * 2^i, i=0..l-1) - 2^l + 2)/2
+     *                = 2^(l-1) - 1 - sum(n[i] * 2^i, i=0..l-1)
+     *                = sum((1 - n[i]) * 2^i, i=0..l-2)
+     */ \
+    unsigned int index = ((unsigned int)(-negative) ^ n) & ((1U << (ECMULT_CONST_GROUP_SIZE - 1)) - 1U); \
+    secp256k1_fe neg_y; \
+    VERIFY_CHECK((n) < (1U << ECMULT_CONST_GROUP_SIZE)); \
+    VERIFY_CHECK(index < (1U << (ECMULT_CONST_GROUP_SIZE - 1))); \
+    /* Unconditionally set r->x = (pre)[m].x. r->y = (pre)[m].y. because it's either the correct one
+     * or will get replaced in the later iterations, this is needed to make sure `r` is initialized. */ \
+    (r)->x = (pre)[m].x; \
+    (r)->y = (pre)[m].y; \
+    for (m = 1; m < ECMULT_CONST_TABLE_SIZE; m++) { \
+        /* This loop is used to avoid secret data in array indices. See
+         * the comment in ecmult_gen_impl.h for rationale. */ \
+        secp256k1_fe_cmov(&(r)->x, &(pre)[m].x, m == index); \
+        secp256k1_fe_cmov(&(r)->y, &(pre)[m].y, m == index); \
+    } \
+    (r)->infinity = 0; \
+    secp256k1_fe_negate(&neg_y, &(r)->y, 1); \
+    secp256k1_fe_cmov(&(r)->y, &neg_y, negative); \
+} while(0)
+
+/* For K as defined in the comment of secp256k1_ecmult_const, we have several precomputed
+ * formulas/constants.
+ * - in exhaustive test mode, we give an explicit expression to compute it at compile time: */
+#ifdef EXHAUSTIVE_TEST_ORDER
+static const secp256k1_scalar secp256k1_ecmult_const_K = ((SECP256K1_SCALAR_CONST(0, 0, 0, (1U << (ECMULT_CONST_BITS - 128)) - 2U, 0, 0, 0, 0) + EXHAUSTIVE_TEST_ORDER - 1U) * (1U + EXHAUSTIVE_TEST_LAMBDA)) % EXHAUSTIVE_TEST_ORDER;
+/* - for the real secp256k1 group we have constants for various ECMULT_CONST_BITS values. */
+#elif ECMULT_CONST_BITS == 129
+/* For GROUP_SIZE = 1,3. */
+static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0xac9c52b3ul, 0x3fa3cf1ful, 0x5ad9e3fdul, 0x77ed9ba4ul, 0xa880b9fcul, 0x8ec739c2ul, 0xe0cfc810ul, 0xb51283ceul);
+#elif ECMULT_CONST_BITS == 130
+/* For GROUP_SIZE = 2,5. */
+static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0xa4e88a7dul, 0xcb13034eul, 0xc2bdd6bful, 0x7c118d6bul, 0x589ae848ul, 0x26ba29e4ul, 0xb5c2c1dcul, 0xde9798d9ul);
+#elif ECMULT_CONST_BITS == 132
+/* For GROUP_SIZE = 4,6 */
+static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0x76b1d93dul, 0x0fae3c6bul, 0x3215874bul, 0x94e93813ul, 0x7937fe0dul, 0xb66bcaaful, 0xb3749ca5ul, 0xd7b6171bul);
+#else
+#  error "Unknown ECMULT_CONST_BITS"
+#endif
+
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q) {
+    /* The approach below combines the signed-digit logic from Mike Hamburg's
+     * "Fast and compact elliptic-curve cryptography" (https://eprint.iacr.org/2012/309)
+     * Section 3.3, with the GLV endomorphism.
+     *
+     * The idea there is to interpret the bits of a scalar as signs (1 = +, 0 = -), and compute a
+     * point multiplication in that fashion. Let v be an n-bit non-negative integer (0 <= v < 2^n),
+     * and v[i] its i'th bit (so v = sum(v[i] * 2^i, i=0..n-1)). Then define:
+     *
+     *   C_l(v, A) = sum((2*v[i] - 1) * 2^i*A, i=0..l-1)
+     *
+     * Then it holds that C_l(v, A) = sum((2*v[i] - 1) * 2^i*A, i=0..l-1)
+     *                              = (2*sum(v[i] * 2^i, i=0..l-1) + 1 - 2^l) * A
+     *                              = (2*v + 1 - 2^l) * A
+     *
+     * Thus, one can compute q*A as C_256((q + 2^256 - 1) / 2, A). This is the basis for the
+     * paper's signed-digit multi-comb algorithm for multiplication using a precomputed table.
+     *
+     * It is appealing to try to combine this with the GLV optimization: the idea that a scalar
+     * s can be written as s1 + lambda*s2, where lambda is a curve-specific constant such that
+     * lambda*A is easy to compute, and where s1 and s2 are small. In particular we have the
+     * secp256k1_scalar_split_lambda function which performs such a split with the resulting s1
+     * and s2 in range (-2^128, 2^128) mod n. This does work, but is uninteresting:
+     *
+     *   To compute q*A:
+     *   - Let s1, s2 = split_lambda(q)
+     *   - Let R1 = C_256((s1 + 2^256 - 1) / 2, A)
+     *   - Let R2 = C_256((s2 + 2^256 - 1) / 2, lambda*A)
+     *   - Return R1 + R2
+     *
+     * The issue is that while s1 and s2 are small-range numbers, (s1 + 2^256 - 1) / 2 (mod n)
+     * and (s2 + 2^256 - 1) / 2 (mod n) are not, undoing the benefit of the splitting.
+     *
+     * To make it work, we want to modify the input scalar q first, before splitting, and then only
+     * add a 2^128 offset of the split results (so that they end up in the single 129-bit range
+     * [0,2^129]). A slightly smaller offset would work due to the bounds on the split, but we pick
+     * 2^128 for simplicity. Let s be the scalar fed to split_lambda, and f(q) the function to
+     * compute it from q:
+     *
+     *   To compute q*A:
+     *   - Compute s = f(q)
+     *   - Let s1, s2 = split_lambda(s)
+     *   - Let v1 = s1 + 2^128 (mod n)
+     *   - Let v2 = s2 + 2^128 (mod n)
+     *   - Let R1 = C_l(v1, A)
+     *   - Let R2 = C_l(v2, lambda*A)
+     *   - Return R1 + R2
+     *
+     * l will thus need to be at least 129, but we may overshoot by a few bits (see
+     * further), so keep it as a variable.
+     *
+     * To solve for s, we reason:
+     *     q*A  = R1 + R2
+     * <=> q*A  = C_l(s1 + 2^128, A) + C_l(s2 + 2^128, lambda*A)
+     * <=> q*A  = (2*(s1 + 2^128) + 1 - 2^l) * A + (2*(s2 + 2^128) + 1 - 2^l) * lambda*A
+     * <=> q*A  = (2*(s1 + s2*lambda) + (2^129 + 1 - 2^l) * (1 + lambda)) * A
+     * <=> q    = 2*(s1 + s2*lambda) + (2^129 + 1 - 2^l) * (1 + lambda) (mod n)
+     * <=> q    = 2*s + (2^129 + 1 - 2^l) * (1 + lambda) (mod n)
+     * <=> s    = (q + (2^l - 2^129 - 1) * (1 + lambda)) / 2 (mod n)
+     * <=> f(q) = (q + K) / 2 (mod n)
+     *            where K = (2^l - 2^129 - 1)*(1 + lambda) (mod n)
+     *
+     * We will process the computation of C_l(v1, A) and C_l(v2, lambda*A) in groups of
+     * ECMULT_CONST_GROUP_SIZE, so we set l to the smallest multiple of ECMULT_CONST_GROUP_SIZE
+     * that is not less than 129; this equals ECMULT_CONST_BITS.
+     */
+
+    /* The offset to add to s1 and s2 to make them non-negative. Equal to 2^128. */
+    static const secp256k1_scalar S_OFFSET = SECP256K1_SCALAR_CONST(0, 0, 0, 1, 0, 0, 0, 0);
+    secp256k1_scalar s, v1, v2;
+    secp256k1_ge pre_a[ECMULT_CONST_TABLE_SIZE];
+    secp256k1_ge pre_a_lam[ECMULT_CONST_TABLE_SIZE];
+    secp256k1_fe global_z;
+    int group, i;
+
+    /* We're allowed to be non-constant time in the point, and the code below (in particular,
+     * secp256k1_ecmult_const_odd_multiples_table_globalz) cannot deal with infinity in a
+     * constant-time manner anyway. */
+    if (secp256k1_ge_is_infinity(a)) {
+        secp256k1_gej_set_infinity(r);
+        return;
+    }
+
+    /* Compute v1 and v2. */
+    secp256k1_scalar_add(&s, q, &secp256k1_ecmult_const_K);
+    secp256k1_scalar_half(&s, &s);
+    secp256k1_scalar_split_lambda(&v1, &v2, &s);
+    secp256k1_scalar_add(&v1, &v1, &S_OFFSET);
+    secp256k1_scalar_add(&v2, &v2, &S_OFFSET);
+
+#ifdef VERIFY
+    /* Verify that v1 and v2 are in range [0, 2^129-1]. */
+    for (i = 129; i < 256; ++i) {
+        VERIFY_CHECK(secp256k1_scalar_get_bits_limb32(&v1, i, 1) == 0);
+        VERIFY_CHECK(secp256k1_scalar_get_bits_limb32(&v2, i, 1) == 0);
+    }
+#endif
+
+    /* Calculate odd multiples of A and A*lambda.
+     * All multiples are brought to the same Z 'denominator', which is stored
+     * in global_z. Due to secp256k1' isomorphism we can do all operations pretending
+     * that the Z coordinate was 1, use affine addition formulae, and correct
+     * the Z coordinate of the result once at the end.
+     */
+    secp256k1_gej_set_ge(r, a);
+    secp256k1_ecmult_const_odd_multiples_table_globalz(pre_a, &global_z, r);
+    for (i = 0; i < ECMULT_CONST_TABLE_SIZE; i++) {
+        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+
+    /* Next, we compute r = C_l(v1, A) + C_l(v2, lambda*A).
+     *
+     * We proceed in groups of ECMULT_CONST_GROUP_SIZE bits, operating on that many bits
+     * at a time, from high in v1, v2 to low. Call these bits1 (from v1) and bits2 (from v2).
+     *
+     * Now note that ECMULT_CONST_TABLE_GET_GE(&t, pre_a, bits1) loads into t a point equal
+     * to C_{ECMULT_CONST_GROUP_SIZE}(bits1, A), and analogously for pre_lam_a / bits2.
+     * This means that all we need to do is add these looked up values together, multiplied
+     * by 2^(ECMULT_GROUP_SIZE * group).
+     */
+    for (group = ECMULT_CONST_GROUPS - 1; group >= 0; --group) {
+        /* Using the _var get_bits function is ok here, since it's only variable in offset and count, not in the scalar. */
+        unsigned int bits1 = secp256k1_scalar_get_bits_var(&v1, group * ECMULT_CONST_GROUP_SIZE, ECMULT_CONST_GROUP_SIZE);
+        unsigned int bits2 = secp256k1_scalar_get_bits_var(&v2, group * ECMULT_CONST_GROUP_SIZE, ECMULT_CONST_GROUP_SIZE);
+        secp256k1_ge t;
+        int j;
+
+        ECMULT_CONST_TABLE_GET_GE(&t, pre_a, bits1);
+        if (group == ECMULT_CONST_GROUPS - 1) {
+            /* Directly set r in the first iteration. */
+            secp256k1_gej_set_ge(r, &t);
+        } else {
+            /* Shift the result so far up. */
+            for (j = 0; j < ECMULT_CONST_GROUP_SIZE; ++j) {
+                secp256k1_gej_double(r, r);
+            }
+            secp256k1_gej_add_ge(r, r, &t);
+        }
+        ECMULT_CONST_TABLE_GET_GE(&t, pre_a_lam, bits2);
+        secp256k1_gej_add_ge(r, r, &t);
+    }
+
+    /* Map the result back to the secp256k1 curve from the isomorphic curve. */
+    secp256k1_fe_mul(&r->z, &r->z, &global_z);
+}
+
+static int secp256k1_ecmult_const_xonly(secp256k1_fe* r, const secp256k1_fe *n, const secp256k1_fe *d, const secp256k1_scalar *q, int known_on_curve) {
+
+    /* This algorithm is a generalization of Peter Dettman's technique for
+     * avoiding the square root in a random-basepoint x-only multiplication
+     * on a Weierstrass curve:
+     * https://mailarchive.ietf.org/arch/msg/cfrg/7DyYY6gg32wDgHAhgSb6XxMDlJA/
+     *
+     *
+     * === Background: the effective affine technique ===
+     *
+     * Let phi_u be the isomorphism that maps (x, y) on secp256k1 curve y^2 = x^3 + 7 to
+     * x' = u^2*x, y' = u^3*y on curve y'^2 = x'^3 + u^6*7. This new curve has the same order as
+     * the original (it is isomorphic), but moreover, has the same addition/doubling formulas, as
+     * the curve b=7 coefficient does not appear in those formulas (or at least does not appear in
+     * the formulas implemented in this codebase, both affine and Jacobian). See also Example 9.5.2
+     * in https://www.math.auckland.ac.nz/~sgal018/crypto-book/ch9.pdf.
+     *
+     * This means any linear combination of secp256k1 points can be computed by applying phi_u
+     * (with non-zero u) on all input points (including the generator, if used), computing the
+     * linear combination on the isomorphic curve (using the same group laws), and then applying
+     * phi_u^{-1} to get back to secp256k1.
+     *
+     * Switching to Jacobian coordinates, note that phi_u applied to (X, Y, Z) is simply
+     * (X, Y, Z/u). Thus, if we want to compute (X1, Y1, Z) + (X2, Y2, Z), with identical Z
+     * coordinates, we can use phi_Z to transform it to (X1, Y1, 1) + (X2, Y2, 1) on an isomorphic
+     * curve where the affine addition formula can be used instead.
+     * If (X3, Y3, Z3) = (X1, Y1) + (X2, Y2) on that curve, then our answer on secp256k1 is
+     * (X3, Y3, Z3*Z).
+     *
+     * This is the effective affine technique: if we have a linear combination of group elements
+     * to compute, and all those group elements have the same Z coordinate, we can simply pretend
+     * that all those Z coordinates are 1, perform the computation that way, and then multiply the
+     * original Z coordinate back in.
+     *
+     * The technique works on any a=0 short Weierstrass curve. It is possible to generalize it to
+     * other curves too, but there the isomorphic curves will have different 'a' coefficients,
+     * which typically does affect the group laws.
+     *
+     *
+     * === Avoiding the square root for x-only point multiplication ===
+     *
+     * In this function, we want to compute the X coordinate of q*(n/d, y), for
+     * y = sqrt((n/d)^3 + 7). Its negation would also be a valid Y coordinate, but by convention
+     * we pick whatever sqrt returns (which we assume to be a deterministic function).
+     *
+     * Let g = y^2*d^3 = n^3 + 7*d^3. This also means y = sqrt(g/d^3).
+     * Further let v = sqrt(d*g), which must exist as d*g = y^2*d^4 = (y*d^2)^2.
+     *
+     * The input point (n/d, y) also has Jacobian coordinates:
+     *
+     *     (n/d, y, 1)
+     *   = (n/d * v^2, y * v^3, v)
+     *   = (n/d * d*g, y * sqrt(d^3*g^3), v)
+     *   = (n/d * d*g, sqrt(y^2 * d^3*g^3), v)
+     *   = (n*g, sqrt(g/d^3 * d^3*g^3), v)
+     *   = (n*g, sqrt(g^4), v)
+     *   = (n*g, g^2, v)
+     *
+     * It is easy to verify that both (n*g, g^2, v) and its negation (n*g, -g^2, v) have affine X
+     * coordinate n/d, and this holds even when the square root function doesn't have a
+     * deterministic sign. We choose the (n*g, g^2, v) version.
+     *
+     * Now switch to the effective affine curve using phi_v, where the input point has coordinates
+     * (n*g, g^2). Compute (X, Y, Z) = q * (n*g, g^2) there.
+     *
+     * Back on secp256k1, that means q * (n*g, g^2, v) = (X, Y, v*Z). This last point has affine X
+     * coordinate X / (v^2*Z^2) = X / (d*g*Z^2). Determining the affine Y coordinate would involve
+     * a square root, but as long as we only care about the resulting X coordinate, no square root
+     * is needed anywhere in this computation.
+     */
+
+    secp256k1_fe g, i;
+    secp256k1_ge p;
+    secp256k1_gej rj;
+
+    /* Compute g = (n^3 + B*d^3). */
+    secp256k1_fe_sqr(&g, n);
+    secp256k1_fe_mul(&g, &g, n);
+    if (d) {
+        secp256k1_fe b;
+        VERIFY_CHECK(!secp256k1_fe_normalizes_to_zero(d));
+        secp256k1_fe_sqr(&b, d);
+        VERIFY_CHECK(SECP256K1_B <= 8); /* magnitude of b will be <= 8 after the next call */
+        secp256k1_fe_mul_int(&b, SECP256K1_B);
+        secp256k1_fe_mul(&b, &b, d);
+        secp256k1_fe_add(&g, &b);
+        if (!known_on_curve) {
+            /* We need to determine whether (n/d)^3 + 7 is square.
+             *
+             *     is_square((n/d)^3 + 7)
+             * <=> is_square(((n/d)^3 + 7) * d^4)
+             * <=> is_square((n^3 + 7*d^3) * d)
+             * <=> is_square(g * d)
+             */
+            secp256k1_fe c;
+            secp256k1_fe_mul(&c, &g, d);
+            if (!secp256k1_fe_is_square_var(&c)) return 0;
+        }
+    } else {
+        secp256k1_fe_add_int(&g, SECP256K1_B);
+        if (!known_on_curve) {
+            /* g at this point equals x^3 + 7. Test if it is square. */
+            if (!secp256k1_fe_is_square_var(&g)) return 0;
+        }
+    }
+
+    /* Compute base point P = (n*g, g^2), the effective affine version of (n*g, g^2, v), which has
+     * corresponding affine X coordinate n/d. */
+    secp256k1_fe_mul(&p.x, &g, n);
+    secp256k1_fe_sqr(&p.y, &g);
+    p.infinity = 0;
+
+    /* Perform x-only EC multiplication of P with q. */
+    VERIFY_CHECK(!secp256k1_scalar_is_zero(q));
+    secp256k1_ecmult_const(&rj, &p, q);
+    VERIFY_CHECK(!secp256k1_gej_is_infinity(&rj));
+
+    /* The resulting (X, Y, Z) point on the effective-affine isomorphic curve corresponds to
+     * (X, Y, Z*v) on the secp256k1 curve. The affine version of that has X coordinate
+     * (X / (Z^2*d*g)). */
+    secp256k1_fe_sqr(&i, &rj.z);
+    secp256k1_fe_mul(&i, &i, &g);
+    if (d) secp256k1_fe_mul(&i, &i, d);
+    secp256k1_fe_inv(&i, &i);
+    secp256k1_fe_mul(r, &rj.x, &i);
+
+    return 1;
+}
+
+#endif /* SECP256K1_ECMULT_CONST_IMPL_H */
--- a/libsecp256k1/src/ecmult_gen.h
+++ b/libsecp256k1/src/ecmult_gen.h
@@ -0,0 +1,143 @@
+/***********************************************************************
+ * Copyright (c) Pieter Wuille, Peter Dettman                          *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_GEN_H
+#define SECP256K1_ECMULT_GEN_H
+
+#include "scalar.h"
+#include "group.h"
+
+
+/* Configuration parameters for the signed-digit multi-comb algorithm:
+ *
+ * - COMB_BLOCKS is the number of blocks the input is split into. Each
+ *   has a corresponding table.
+ * - COMB_TEETH is the number of bits simultaneously covered by one table.
+ * - COMB_RANGE is the number of bits in supported scalars. For production
+ *   purposes, only 256 is reasonable, but smaller numbers are supported for
+ *   exhaustive test mode.
+ *
+ * The comb's spacing (COMB_SPACING), or the distance between the teeth,
+ * is defined as ceil(COMB_RANGE / (COMB_BLOCKS * COMB_TEETH)). Each block covers
+ * COMB_SPACING * COMB_TEETH consecutive bits in the input.
+ *
+ * The size of the precomputed table is COMB_BLOCKS * (1 << (COMB_TEETH - 1))
+ * secp256k1_ge_storages.
+ *
+ * The number of point additions equals COMB_BLOCKS * COMB_SPACING. Each point
+ * addition involves a cmov from (1 << (COMB_TEETH - 1)) table entries and a
+ * conditional negation.
+ *
+ * The number of point doublings is COMB_SPACING - 1. */
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+/* We need to control these values for exhaustive tests because
+ * the table cannot have infinities in them (secp256k1_ge_storage
+ * doesn't support infinities) */
+#  undef COMB_BLOCKS
+#  undef COMB_TEETH
+#  if EXHAUSTIVE_TEST_ORDER == 7
+#    define COMB_RANGE 3
+#    define COMB_BLOCKS 1
+#    define COMB_TEETH 2
+#  elif EXHAUSTIVE_TEST_ORDER == 13
+#    define COMB_RANGE 4
+#    define COMB_BLOCKS 1
+#    define COMB_TEETH 2
+#  elif EXHAUSTIVE_TEST_ORDER == 199
+#    define COMB_RANGE 8
+#    define COMB_BLOCKS 2
+#    define COMB_TEETH 3
+#  else
+#    error "Unknown exhaustive test order"
+#  endif
+#  if (COMB_RANGE >= 32) || ((EXHAUSTIVE_TEST_ORDER >> (COMB_RANGE - 1)) != 1)
+#    error "COMB_RANGE != ceil(log2(EXHAUSTIVE_TEST_ORDER+1))"
+#  endif
+#else /* !defined(EXHAUSTIVE_TEST_ORDER) */
+#  define COMB_RANGE 256
+#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
+
+/* Use (11, 6) as default configuration, which results in a 22 kB table. */
+#ifndef COMB_BLOCKS
+#  define COMB_BLOCKS 11
+#  ifdef DEBUG_CONFIG
+#    pragma message DEBUG_CONFIG_MSG("COMB_BLOCKS undefined, assuming default value")
+#  endif
+#endif
+#ifndef COMB_TEETH
+#  define COMB_TEETH 6
+#  ifdef DEBUG_CONFIG
+#    pragma message DEBUG_CONFIG_MSG("COMB_TEETH undefined, assuming default value")
+#  endif
+#endif
+/* Use ceil(COMB_RANGE / (COMB_BLOCKS * COMB_TEETH)) as COMB_SPACING. */
+#define COMB_SPACING CEIL_DIV(COMB_RANGE, COMB_BLOCKS * COMB_TEETH)
+
+/* Range checks on the parameters. */
+
+/* The remaining COMB_* parameters are derived values, don't modify these. */
+/* - The number of bits covered by all the blocks; must be at least COMB_RANGE. */
+#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
+/* - The number of entries per table. */
+#define COMB_POINTS (1 << (COMB_TEETH - 1))
+
+/* Sanity checks. */
+#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
+#  error "COMB_BLOCKS must be in the range [1, 256]"
+#endif
+#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
+#  error "COMB_TEETH must be in the range [1, 8]"
+#endif
+#if COMB_BITS < COMB_RANGE
+#  error "COMB_BLOCKS * COMB_TEETH * COMB_SPACING is too low"
+#endif
+
+/* These last 2 checks are not strictly required, but prevent gratuitously inefficient
+ * configurations. Note that they compare with 256 rather than COMB_RANGE, so they do
+ * permit somewhat excessive values for the exhaustive test case, where testing with
+ * suboptimal parameters may be desirable. */
+#if (COMB_BLOCKS - 1) * COMB_TEETH * COMB_SPACING >= 256
+#  error "COMB_BLOCKS can be reduced"
+#endif
+#if COMB_BLOCKS * (COMB_TEETH - 1) * COMB_SPACING >= 256
+#  error "COMB_TEETH can be reduced"
+#endif
+
+#ifdef DEBUG_CONFIG
+#  pragma message DEBUG_CONFIG_DEF(COMB_RANGE)
+#  pragma message DEBUG_CONFIG_DEF(COMB_BLOCKS)
+#  pragma message DEBUG_CONFIG_DEF(COMB_TEETH)
+#  pragma message DEBUG_CONFIG_DEF(COMB_SPACING)
+#endif
+
+typedef struct {
+    /* Whether the context has been built. */
+    int built;
+
+    /* Values chosen such that
+     *
+     *   n*G == comb(n + scalar_offset, G/2) + ge_offset.
+     *
+     * This expression lets us use scalar blinding and optimize the comb precomputation. See
+     * ecmult_gen_impl.h for more details. */
+    secp256k1_scalar scalar_offset;
+    secp256k1_ge ge_offset;
+
+    /* Factor used for projective blinding. This value is used to rescale the Z
+     * coordinate of the first table lookup. */
+    secp256k1_fe proj_blind;
+} secp256k1_ecmult_gen_context;
+
+static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context* ctx);
+static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context* ctx);
+
+/** Multiply with the generator: R = a*G */
+static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context* ctx, secp256k1_gej *r, const secp256k1_scalar *a);
+
+static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32);
+
+#endif /* SECP256K1_ECMULT_GEN_H */
--- a/libsecp256k1/src/ecmult_gen_compute_table.h
+++ b/libsecp256k1/src/ecmult_gen_compute_table.h
@@ -0,0 +1,14 @@
+/***********************************************************************
+ * Copyright (c) Pieter Wuille, Gregory Maxwell                        *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H
+#define SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H
+
+#include "ecmult_gen.h"
+
+static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth, int spacing);
+
+#endif /* SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H */
--- a/libsecp256k1/src/ecmult_gen_compute_table_impl.h
+++ b/libsecp256k1/src/ecmult_gen_compute_table_impl.h
@@ -0,0 +1,108 @@
+/***********************************************************************
+ * Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman         *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
+#define SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
+
+#include "ecmult_gen_compute_table.h"
+#include "group_impl.h"
+#include "field_impl.h"
+#include "scalar_impl.h"
+#include "ecmult_gen.h"
+#include "util.h"
+
+static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth, int spacing) {
+    size_t points = ((size_t)1) << (teeth - 1);
+    size_t points_total = points * blocks;
+    secp256k1_ge* prec = checked_malloc(&default_error_callback, points_total * sizeof(*prec));
+    secp256k1_gej* ds = checked_malloc(&default_error_callback, teeth * sizeof(*ds));
+    secp256k1_gej* vs = checked_malloc(&default_error_callback, points_total * sizeof(*vs));
+    secp256k1_gej u;
+    size_t vs_pos = 0;
+    secp256k1_scalar half;
+    int block, i;
+
+    VERIFY_CHECK(points_total > 0);
+
+    /* u is the running power of two times gen we're working with, initially gen/2. */
+    secp256k1_scalar_half(&half, &secp256k1_scalar_one);
+    secp256k1_gej_set_infinity(&u);
+    for (i = 255; i >= 0; --i) {
+        /* Use a very simple multiplication ladder to avoid dependency on ecmult. */
+        secp256k1_gej_double_var(&u, &u, NULL);
+        if (secp256k1_scalar_get_bits_limb32(&half, i, 1)) {
+            secp256k1_gej_add_ge_var(&u, &u, gen, NULL);
+        }
+    }
+#ifdef VERIFY
+    {
+        /* Verify that u*2 = gen. */
+        secp256k1_gej double_u;
+        secp256k1_gej_double_var(&double_u, &u, NULL);
+        VERIFY_CHECK(secp256k1_gej_eq_ge_var(&double_u, gen));
+    }
+#endif
+
+    for (block = 0; block < blocks; ++block) {
+        int tooth;
+        /* Here u = 2^(block*teeth*spacing) * gen/2. */
+        secp256k1_gej sum;
+        secp256k1_gej_set_infinity(&sum);
+        for (tooth = 0; tooth < teeth; ++tooth) {
+            /* Here u = 2^((block*teeth + tooth)*spacing) * gen/2. */
+            /* Make sum = sum(2^((block*teeth + t)*spacing), t=0..tooth) * gen/2. */
+            secp256k1_gej_add_var(&sum, &sum, &u, NULL);
+            /* Make u = 2^((block*teeth + tooth)*spacing + 1) * gen/2. */
+            secp256k1_gej_double_var(&u, &u, NULL);
+            /* Make ds[tooth] = u = 2^((block*teeth + tooth)*spacing + 1) * gen/2. */
+            ds[tooth] = u;
+            /* Make u = 2^((block*teeth + tooth + 1)*spacing) * gen/2, unless at the end. */
+            if (block + tooth != blocks + teeth - 2) {
+                int bit_off;
+                for (bit_off = 1; bit_off < spacing; ++bit_off) {
+                    secp256k1_gej_double_var(&u, &u, NULL);
+                }
+            }
+        }
+        /* Now u = 2^((block*teeth + teeth)*spacing) * gen/2
+         *       = 2^((block+1)*teeth*spacing) * gen/2       */
+
+        /* Next, compute the table entries for block number block in Jacobian coordinates.
+         * The entries will occupy vs[block*points + i] for i=0..points-1.
+         * We start by computing the first (i=0) value corresponding to all summed
+         * powers of two times G being negative. */
+        secp256k1_gej_neg(&vs[vs_pos++], &sum);
+        /* And then teeth-1 times "double" the range of i values for which the table
+         * is computed: in each iteration, double the table by taking an existing
+         * table entry and adding ds[tooth]. */
+        for (tooth = 0; tooth < teeth - 1; ++tooth) {
+            size_t stride = ((size_t)1) << tooth;
+            size_t index;
+            for (index = 0; index < stride; ++index, ++vs_pos) {
+                secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
+            }
+        }
+    }
+    VERIFY_CHECK(vs_pos == points_total);
+
+    /* Convert all points simultaneously from secp256k1_gej to secp256k1_ge. */
+    secp256k1_ge_set_all_gej_var(prec, vs, points_total);
+    /* Convert all points from secp256k1_ge to secp256k1_ge_storage output. */
+    for (block = 0; block < blocks; ++block) {
+        size_t index;
+        for (index = 0; index < points; ++index) {
+            VERIFY_CHECK(!secp256k1_ge_is_infinity(&prec[block * points + index]));
+            secp256k1_ge_to_storage(&table[block * points + index], &prec[block * points + index]);
+        }
+    }
+
+    /* Free memory. */
+    free(vs);
+    free(ds);
+    free(prec);
+}
+
+#endif /* SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H */
--- a/libsecp256k1/src/ecmult_gen_impl.h
+++ b/libsecp256k1/src/ecmult_gen_impl.h
@@ -0,0 +1,341 @@
+/***********************************************************************
+ * Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman         *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_ECMULT_GEN_IMPL_H
+#define SECP256K1_ECMULT_GEN_IMPL_H
+
+#include "util.h"
+#include "scalar.h"
+#include "group.h"
+#include "ecmult_gen.h"
+#include "hash_impl.h"
+#include "precomputed_ecmult_gen.h"
+
+static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx) {
+    secp256k1_ecmult_gen_blind(ctx, NULL);
+    ctx->built = 1;
+}
+
+static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context* ctx) {
+    return ctx->built;
+}
+
+static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
+    ctx->built = 0;
+    secp256k1_scalar_clear(&ctx->scalar_offset);
+    secp256k1_ge_clear(&ctx->ge_offset);
+    secp256k1_fe_clear(&ctx->proj_blind);
+}
+
+/* Compute the scalar (2^COMB_BITS - 1) / 2, the difference between the gn argument to
+ * secp256k1_ecmult_gen, and the scalar whose encoding the table lookup bits are drawn
+ * from (before applying blinding). */
+static void secp256k1_ecmult_gen_scalar_diff(secp256k1_scalar* diff) {
+    int i;
+
+    /* Compute scalar -1/2. */
+    secp256k1_scalar neghalf;
+    secp256k1_scalar_half(&neghalf, &secp256k1_scalar_one);
+    secp256k1_scalar_negate(&neghalf, &neghalf);
+
+    /* Compute offset = 2^(COMB_BITS - 1). */
+    *diff = secp256k1_scalar_one;
+    for (i = 0; i < COMB_BITS - 1; ++i) {
+        secp256k1_scalar_add(diff, diff, diff);
+    }
+
+    /* The result is the sum 2^(COMB_BITS - 1) + (-1/2). */
+    secp256k1_scalar_add(diff, diff, &neghalf);
+}
+
+static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) {
+    uint32_t comb_off;
+    secp256k1_ge add;
+    secp256k1_fe neg;
+    secp256k1_ge_storage adds;
+    secp256k1_scalar d;
+    /* Array of uint32_t values large enough to store COMB_BITS bits. Only the bottom
+     * 8 are ever nonzero, but having the zero padding at the end if COMB_BITS>256
+     * avoids the need to deal with out-of-bounds reads from a scalar. */
+    uint32_t recoded[(COMB_BITS + 31) >> 5] = {0};
+    int first = 1, i;
+
+    memset(&adds, 0, sizeof(adds));
+
+    /* We want to compute R = gn*G.
+     *
+     * To blind the scalar used in the computation, we rewrite this to be
+     * R = (gn - b)*G + b*G, with a blinding value b determined by the context.
+     *
+     * The multiplication (gn-b)*G will be performed using a signed-digit multi-comb (see Section
+     * 3.3 of "Fast and compact elliptic-curve cryptography" by Mike Hamburg,
+     * https://eprint.iacr.org/2012/309).
+     *
+     * Let comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1), where s[i] is the i'th bit of
+     * the binary representation of scalar s. So the s[i] values determine whether -2^i*P (s[i]=0)
+     * or +2^i*P (s[i]=1) are added together. COMB_BITS is at least 256, so all bits of s are
+     * covered. By manipulating:
+     *
+     *     comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1)
+     * <=> comb(s, P) = sum((2*s[i]-1)*2^i for i=0..COMB_BITS-1) * P
+     * <=> comb(s, P) = (2*sum(s[i]*2^i for i=0..COMB_BITS-1) - sum(2^i for i=0..COMB_BITS-1)) * P
+     * <=> comb(s, P) = (2*s - (2^COMB_BITS - 1)) * P
+     *
+     * If we wanted to compute (gn-b)*G as comb(s, G), it would need to hold that
+     *
+     *     (gn - b) * G = (2*s - (2^COMB_BITS - 1)) * G
+     * <=> s = (gn - b + (2^COMB_BITS - 1))/2 (mod order)
+     *
+     * We use an alternative here that avoids the modular division by two: instead we compute
+     * (gn-b)*G as comb(d, G/2). For that to hold it must be the case that
+     *
+     *     (gn - b) * G = (2*d - (2^COMB_BITS - 1)) * (G/2)
+     * <=> d = gn - b + (2^COMB_BITS - 1)/2 (mod order)
+     *
+     * Adding precomputation, our final equations become:
+     *
+     *     ctx->scalar_offset = (2^COMB_BITS - 1)/2 - b (mod order)
+     *     ctx->ge_offset = b*G
+     *     d = gn + ctx->scalar_offset (mod order)
+     *     R = comb(d, G/2) + ctx->ge_offset
+     *
+     * comb(d, G/2) function is then computed by summing + or - 2^(i-1)*G, for i=0..COMB_BITS-1,
+     * depending on the value of the bits d[i] of the binary representation of scalar d.
+     */
+
+    /* Compute the scalar d = (gn + ctx->scalar_offset). */
+    secp256k1_scalar_add(&d, &ctx->scalar_offset, gn);
+    /* Convert to recoded array. */
+    for (i = 0; i < 8 && i < ((COMB_BITS + 31) >> 5); ++i) {
+        recoded[i] = secp256k1_scalar_get_bits_limb32(&d, 32 * i, 32);
+    }
+    secp256k1_scalar_clear(&d);
+
+    /* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
+     * (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions.
+     * We rewrite our equation in terms of these table entries.
+     *
+     * Let mask(b) = sum(2^((b*COMB_TEETH + t)*COMB_SPACING) for t=0..COMB_TEETH-1),
+     * with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11,
+     * COMB_TEETH=6, COMB_SPACING=4, we would have:
+     *   mask(0)  = 2^0   + 2^4   + 2^8   + 2^12  + 2^16  + 2^20,
+     *   mask(1)  = 2^24  + 2^28  + 2^32  + 2^36  + 2^40  + 2^44,
+     *   mask(2)  = 2^48  + 2^52  + 2^56  + 2^60  + 2^64  + 2^68,
+     *   ...
+     *   mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260
+     *
+     * We will split up the bits d[i] using these masks. Specifically, each mask is
+     * used COMB_SPACING times, with different shifts:
+     *
+     * d = (d & mask(0)<<0) + (d & mask(1)<<0) + ... + (d & mask(COMB_BLOCKS-1)<<0) +
+     *     (d & mask(0)<<1) + (d & mask(1)<<1) + ... + (d & mask(COMB_BLOCKS-1)<<1) +
+     *     ...
+     *     (d & mask(0)<<(COMB_SPACING-1)) + ...
+     *
+     * Now define table(b, m) = (m - mask(b)/2) * G, and we will precompute these values for
+     * b=0..COMB_BLOCKS-1, and for all values m which (d & mask(b)) can take (so m can take on
+     * 2^COMB_TEETH distinct values).
+     *
+     * If m=(d & mask(b)), then table(b, m) is the sum of 2^i * (2*d[i]-1) * G/2, with i
+     * iterating over the set bits in mask(b). In our example, table(2, 2^48 + 2^56 + 2^68)
+     * would equal (2^48 - 2^52 + 2^56 - 2^60 - 2^64 + 2^68) * G/2.
+     *
+     * With that, we can rewrite comb(d, G/2) as:
+     *
+     *     2^0 * (table(0, d>>0 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>0 & mask(COMP_BLOCKS-1)))
+     *   + 2^1 * (table(0, d>>1 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>1 & mask(COMP_BLOCKS-1)))
+     *   + 2^2 * (table(0, d>>2 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>2 & mask(COMP_BLOCKS-1)))
+     *   + ...
+     *   + 2^(COMB_SPACING-1) * (table(0, d>>(COMB_SPACING-1) & mask(0)) + ...)
+     *
+     * Or more generically as
+     *
+     *   sum(2^i * sum(table(b, d>>i & mask(b)), b=0..COMB_BLOCKS-1), i=0..COMB_SPACING-1)
+     *
+     * This is implemented using an outer loop that runs in reverse order over the lines of this
+     * equation, which in each iteration runs an inner loop that adds the terms of that line and
+     * then doubles the result before proceeding to the next line.
+     *
+     * In pseudocode:
+     *   c = infinity
+     *   for comb_off in range(COMB_SPACING - 1, -1, -1):
+     *     for block in range(COMB_BLOCKS):
+     *       c += table(block, (d >> comb_off) & mask(block))
+     *     if comb_off > 0:
+     *       c = 2*c
+     *   return c
+     *
+     * This computes c = comb(d, G/2), and thus finally R = c + ctx->ge_offset. Note that it would
+     * be possible to apply an initial offset instead of a final offset (moving ge_offset to take
+     * the place of infinity above), but the chosen approach allows using (in a future improvement)
+     * an incomplete addition formula for most of the multiplication.
+     *
+     * The last question is how to implement the table(b, m) function. For any value of b,
+     * m=(d & mask(b)) can only take on at most 2^COMB_TEETH possible values (the last one may have
+     * fewer as there mask(b) may exceed the curve order). So we could create COMB_BLOCK tables
+     * which contain a value for each such m value.
+     *
+     * Now note that if m=(d & mask(b)), then flipping the relevant bits of m results in negating
+     * the result of table(b, m). This is because table(b,m XOR mask(b)) = table(b, mask(b) - m) =
+     * (mask(b) - m - mask(b)/2)*G = (-m + mask(b)/2)*G = -(m - mask(b)/2)*G = -table(b, m).
+     * Because of this it suffices to only store the first half of the m values for every b. If an
+     * entry from the second half is needed, we look up its bit-flipped version instead, and negate
+     * it.
+     *
+     * secp256k1_ecmult_gen_prec_table[b][index] stores the table(b, m) entries. Index
+     * is the relevant mask(b) bits of m packed together without gaps. */
+
+    /* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */
+    comb_off = COMB_SPACING - 1;
+    while (1) {
+        uint32_t block;
+        uint32_t bit_pos = comb_off;
+        /* Inner loop: for each block, add table entries to the result. */
+        for (block = 0; block < COMB_BLOCKS; ++block) {
+            /* Gather the mask(block)-selected bits of d into bits. They're packed:
+             * bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */
+            uint32_t bits = 0, sign, abs, index, tooth;
+            /* Instead of reading individual bits here to construct the bits variable,
+             * build up the result by xoring rotated reads together. In every iteration,
+             * one additional bit is made correct, starting at the bottom. The bits
+             * above that contain junk. This reduces leakage by avoiding computations
+             * on variables that can have only a low number of possible values (e.g.,
+             * just two values when reading a single bit into a variable.) See:
+             * https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf
+             */
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                /* Construct bitdata s.t. the bottom bit is the bit we'd like to read.
+                 *
+                 * We could just set bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f)
+                 * but this would simply discard the bits that fall off at the bottom,
+                 * and thus, for example, bitdata could still have only two values if we
+                 * happen to shift by exactly 31 positions. We use a rotation instead,
+                 * which ensures that bitdata doesn't loose entropy. This relies on the
+                 * rotation being atomic, i.e., the compiler emitting an actual rot
+                 * instruction. */
+                uint32_t bitdata = secp256k1_rotr32(recoded[bit_pos >> 5], bit_pos & 0x1f);
+
+                /* Clear the bit at position tooth, but sssh, don't tell clang. */
+                uint32_t volatile vmask = ~(1 << tooth);
+                bits &= vmask;
+
+                /* Write the bit into position tooth (and junk into higher bits). */
+                bits ^= bitdata << tooth;
+                bit_pos += COMB_SPACING;
+            }
+
+            /* If the top bit of bits is 1, flip them all (corresponding to looking up
+             * the negated table value), and remember to negate the result in sign. */
+            sign = (bits >> (COMB_TEETH - 1)) & 1;
+            abs = (bits ^ -sign) & (COMB_POINTS - 1);
+            VERIFY_CHECK(sign == 0 || sign == 1);
+            VERIFY_CHECK(abs < COMB_POINTS);
+
+            /** This uses a conditional move to avoid any secret data in array indexes.
+             *   _Any_ use of secret indexes has been demonstrated to result in timing
+             *   sidechannels, even when the cache-line access patterns are uniform.
+             *  See also:
+             *   "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe
+             *    (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and
+             *   "Cache Attacks and Countermeasures: the Case of AES", RSA 2006,
+             *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
+             *    (https://www.tau.ac.il/~tromer/papers/cache.pdf)
+             */
+            for (index = 0; index < COMB_POINTS; ++index) {
+                secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == abs);
+            }
+
+            /* Set add=adds or add=-adds, in constant time, based on sign. */
+            secp256k1_ge_from_storage(&add, &adds);
+            secp256k1_fe_negate(&neg, &add.y, 1);
+            secp256k1_fe_cmov(&add.y, &neg, sign);
+
+            /* Add the looked up and conditionally negated value to r. */
+            if (EXPECT(first, 0)) {
+                /* If this is the first table lookup, we can skip addition. */
+                secp256k1_gej_set_ge(r, &add);
+                /* Give the entry a random Z coordinate to blind intermediary results. */
+                secp256k1_gej_rescale(r, &ctx->proj_blind);
+                first = 0;
+            } else {
+                secp256k1_gej_add_ge(r, r, &add);
+            }
+        }
+
+        /* Double the result, except in the last iteration. */
+        if (comb_off-- == 0) break;
+        secp256k1_gej_double(r, r);
+    }
+
+    /* Correct for the scalar_offset added at the start (ge_offset = b*G, while b was
+     * subtracted from the input scalar gn). */
+    secp256k1_gej_add_ge(r, r, &ctx->ge_offset);
+
+    /* Cleanup. */
+    secp256k1_fe_clear(&neg);
+    secp256k1_ge_clear(&add);
+    secp256k1_memclear(&adds, sizeof(adds));
+    secp256k1_memclear(&recoded, sizeof(recoded));
+}
+
+/* Setup blinding values for secp256k1_ecmult_gen. */
+static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
+    secp256k1_scalar b;
+    secp256k1_scalar diff;
+    secp256k1_gej gb;
+    secp256k1_fe f;
+    unsigned char nonce32[32];
+    secp256k1_rfc6979_hmac_sha256 rng;
+    unsigned char keydata[64];
+
+    /* Compute the (2^COMB_BITS - 1)/2 term once. */
+    secp256k1_ecmult_gen_scalar_diff(&diff);
+
+    if (seed32 == NULL) {
+        /* When seed is NULL, reset the final point and blinding value. */
+        secp256k1_ge_neg(&ctx->ge_offset, &secp256k1_ge_const_g);
+        secp256k1_scalar_add(&ctx->scalar_offset, &secp256k1_scalar_one, &diff);
+        ctx->proj_blind = secp256k1_fe_one;
+        return;
+    }
+    /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
+    secp256k1_scalar_get_b32(keydata, &ctx->scalar_offset);
+    /** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data,
+     *   and guards against weak or adversarial seeds.  This is a simpler and safer interface than
+     *   asking the caller for blinding values directly and expecting them to retry on failure.
+     */
+    VERIFY_CHECK(seed32 != NULL);
+    memcpy(keydata + 32, seed32, 32);
+    secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, 64);
+    secp256k1_memclear(keydata, sizeof(keydata));
+
+    /* Compute projective blinding factor (cannot be 0). */
+    secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+    secp256k1_fe_set_b32_mod(&f, nonce32);
+    secp256k1_fe_cmov(&f, &secp256k1_fe_one, secp256k1_fe_normalizes_to_zero(&f));
+    ctx->proj_blind = f;
+
+    /* For a random blinding value b, set scalar_offset=diff-b, ge_offset=bG */
+    secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+    secp256k1_scalar_set_b32(&b, nonce32, NULL);
+    /* The blinding value cannot be zero, as that would mean ge_offset = infinity,
+     * which secp256k1_gej_add_ge cannot handle. */
+    secp256k1_scalar_cmov(&b, &secp256k1_scalar_one, secp256k1_scalar_is_zero(&b));
+    secp256k1_rfc6979_hmac_sha256_finalize(&rng);
+    secp256k1_ecmult_gen(ctx, &gb, &b);
+    secp256k1_scalar_negate(&b, &b);
+    secp256k1_scalar_add(&ctx->scalar_offset, &b, &diff);
+    secp256k1_ge_set_gej(&ctx->ge_offset, &gb);
+
+    /* Clean up. */
+    secp256k1_memclear(nonce32, sizeof(nonce32));
+    secp256k1_scalar_clear(&b);
+    secp256k1_gej_clear(&gb);
+    secp256k1_fe_clear(&f);
+    secp256k1_rfc6979_hmac_sha256_clear(&rng);
+}
+
+#endif /* SECP256K1_ECMULT_GEN_IMPL_H */
--- a/libsecp256k1/src/ecmult_impl.h
+++ b/libsecp256k1/src/ecmult_impl.h
@@ -0,0 +1,853 @@
+/******************************************************************************
+ * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick  *
+ * Distributed under the MIT software license, see the accompanying           *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.       *
+ ******************************************************************************/
+
+#ifndef SECP256K1_ECMULT_IMPL_H
+#define SECP256K1_ECMULT_IMPL_H
+
+#include <string.h>
+#include <stdint.h>
+
+#include "util.h"
+#include "group.h"
+#include "scalar.h"
+#include "ecmult.h"
+#include "precomputed_ecmult.h"
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+/* We need to lower these values for exhaustive tests because
+ * the tables cannot have infinities in them (this breaks the
+ * affine-isomorphism stuff which tracks z-ratios) */
+#  if EXHAUSTIVE_TEST_ORDER > 128
+#    define WINDOW_A 5
+#  elif EXHAUSTIVE_TEST_ORDER > 8
+#    define WINDOW_A 4
+#  else
+#    define WINDOW_A 2
+#  endif
+#else
+/* optimal for 128-bit and 256-bit exponents. */
+#  define WINDOW_A 5
+/** Larger values for ECMULT_WINDOW_SIZE result in possibly better
+ *  performance at the cost of an exponentially larger precomputed
+ *  table. The exact table size is
+ *      (1 << (WINDOW_G - 2)) * sizeof(secp256k1_ge_storage)  bytes,
+ *  where sizeof(secp256k1_ge_storage) is typically 64 bytes but can
+ *  be larger due to platform-specific padding and alignment.
+ *  Two tables of this size are used (due to the endomorphism
+ *  optimization).
+ */
+#endif
+
+#define WNAF_BITS 128
+#define WNAF_SIZE_BITS(bits, w) CEIL_DIV(bits, w)
+#define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w)
+
+/* The number of objects allocated on the scratch space for ecmult_multi algorithms */
+#define PIPPENGER_SCRATCH_OBJECTS 6
+#define STRAUSS_SCRATCH_OBJECTS 5
+
+#define PIPPENGER_MAX_BUCKET_WINDOW 12
+
+/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */
+#define ECMULT_PIPPENGER_THRESHOLD 88
+
+#define ECMULT_MAX_POINTS_PER_BATCH 5000000
+
+/** Fill a table 'pre_a' with precomputed odd multiples of a.
+ *  pre_a will contain [1*a,3*a,...,(2*n-1)*a], so it needs space for n group elements.
+ *  zr needs space for n field elements.
+ *
+ *  Although pre_a is an array of _ge rather than _gej, it actually represents elements
+ *  in Jacobian coordinates with their z coordinates omitted. The omitted z-coordinates
+ *  can be recovered using z and zr. Using the notation z(b) to represent the omitted
+ *  z coordinate of b:
+ *  - z(pre_a[n-1]) = 'z'
+ *  - z(pre_a[i-1]) = z(pre_a[i]) / zr[i] for n > i > 0
+ *
+ *  Lastly the zr[0] value, which isn't used above, is set so that:
+ *  - a.z = z(pre_a[0]) / zr[0]
+ */
+static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_ge *pre_a, secp256k1_fe *zr, secp256k1_fe *z, const secp256k1_gej *a) {
+    secp256k1_gej d, ai;
+    secp256k1_ge d_ge;
+    int i;
+
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_gej_double_var(&d, a, NULL);
+
+    /*
+     * Perform the additions using an isomorphic curve Y^2 = X^3 + 7*C^6 where C := d.z.
+     * The isomorphism, phi, maps a secp256k1 point (x, y) to the point (x*C^2, y*C^3) on the other curve.
+     * In Jacobian coordinates phi maps (x, y, z) to (x*C^2, y*C^3, z) or, equivalently to (x, y, z/C).
+     *
+     *     phi(x, y, z) = (x*C^2, y*C^3, z) = (x, y, z/C)
+     *   d_ge := phi(d) = (d.x, d.y, 1)
+     *     ai := phi(a) = (a.x*C^2, a.y*C^3, a.z)
+     *
+     * The group addition functions work correctly on these isomorphic curves.
+     * In particular phi(d) is easy to represent in affine coordinates under this isomorphism.
+     * This lets us use the faster secp256k1_gej_add_ge_var group addition function that we wouldn't be able to use otherwise.
+     */
+    secp256k1_ge_set_xy(&d_ge, &d.x, &d.y);
+    secp256k1_ge_set_gej_zinv(&pre_a[0], a, &d.z);
+    secp256k1_gej_set_ge(&ai, &pre_a[0]);
+    ai.z = a->z;
+
+    /* pre_a[0] is the point (a.x*C^2, a.y*C^3, a.z*C) which is equivalent to a.
+     * Set zr[0] to C, which is the ratio between the omitted z(pre_a[0]) value and a.z.
+     */
+    zr[0] = d.z;
+
+    for (i = 1; i < n; i++) {
+        secp256k1_gej_add_ge_var(&ai, &ai, &d_ge, &zr[i]);
+        secp256k1_ge_set_xy(&pre_a[i], &ai.x, &ai.y);
+    }
+
+    /* Multiply the last z-coordinate by C to undo the isomorphism.
+     * Since the z-coordinates of the pre_a values are implied by the zr array of z-coordinate ratios,
+     * undoing the isomorphism here undoes the isomorphism for all pre_a values.
+     */
+    secp256k1_fe_mul(z, &ai.z, &d.z);
+}
+
+SECP256K1_INLINE static void secp256k1_ecmult_table_verify(int n, int w) {
+    (void)n;
+    (void)w;
+    VERIFY_CHECK(((n) & 1) == 1);
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1));
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1));
+}
+
+SECP256K1_INLINE static void secp256k1_ecmult_table_get_ge(secp256k1_ge *r, const secp256k1_ge *pre, int n, int w) {
+    secp256k1_ecmult_table_verify(n,w);
+    if (n > 0) {
+        *r = pre[(n-1)/2];
+    } else {
+        *r = pre[(-n-1)/2];
+        secp256k1_fe_negate(&(r->y), &(r->y), 1);
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_ecmult_table_get_ge_lambda(secp256k1_ge *r, const secp256k1_ge *pre, const secp256k1_fe *x, int n, int w) {
+    secp256k1_ecmult_table_verify(n,w);
+    if (n > 0) {
+        secp256k1_ge_set_xy(r, &x[(n-1)/2], &pre[(n-1)/2].y);
+    } else {
+        secp256k1_ge_set_xy(r, &x[(-n-1)/2], &pre[(-n-1)/2].y);
+        secp256k1_fe_negate(&(r->y), &(r->y), 1);
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_ecmult_table_get_ge_storage(secp256k1_ge *r, const secp256k1_ge_storage *pre, int n, int w) {
+    secp256k1_ecmult_table_verify(n,w);
+    if (n > 0) {
+        secp256k1_ge_from_storage(r, &pre[(n-1)/2]);
+    } else {
+        secp256k1_ge_from_storage(r, &pre[(-n-1)/2]);
+        secp256k1_fe_negate(&(r->y), &(r->y), 1);
+    }
+}
+
+/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
+ *  with the following guarantees:
+ *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
+ *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
+ *  - the number of set values in wnaf is returned. This number is at most 256, and at most one more
+ *    than the number of bits in the (absolute value) of the input.
+ */
+static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a, int w) {
+    secp256k1_scalar s;
+    int last_set_bit = -1;
+    int bit = 0;
+    int sign = 1;
+    int carry = 0;
+
+    VERIFY_CHECK(wnaf != NULL);
+    VERIFY_CHECK(0 <= len && len <= 256);
+    VERIFY_CHECK(a != NULL);
+    VERIFY_CHECK(2 <= w && w <= 31);
+
+    for (bit = 0; bit < len; bit++) {
+        wnaf[bit] = 0;
+    }
+
+    s = *a;
+    if (secp256k1_scalar_get_bits_limb32(&s, 255, 1)) {
+        secp256k1_scalar_negate(&s, &s);
+        sign = -1;
+    }
+
+    bit = 0;
+    while (bit < len) {
+        int now;
+        int word;
+        if (secp256k1_scalar_get_bits_limb32(&s, bit, 1) == (unsigned int)carry) {
+            bit++;
+            continue;
+        }
+
+        now = w;
+        if (now > len - bit) {
+            now = len - bit;
+        }
+
+        word = secp256k1_scalar_get_bits_var(&s, bit, now) + carry;
+
+        carry = (word >> (w-1)) & 1;
+        word -= carry << w;
+
+        wnaf[bit] = sign * word;
+        last_set_bit = bit;
+
+        bit += now;
+    }
+#ifdef VERIFY
+    {
+        int verify_bit = bit;
+
+        VERIFY_CHECK(carry == 0);
+
+        while (verify_bit < 256) {
+            VERIFY_CHECK(secp256k1_scalar_get_bits_limb32(&s, verify_bit, 1) == 0);
+            verify_bit++;
+        }
+    }
+#endif
+    return last_set_bit + 1;
+}
+
+struct secp256k1_strauss_point_state {
+    int wnaf_na_1[129];
+    int wnaf_na_lam[129];
+    int bits_na_1;
+    int bits_na_lam;
+};
+
+struct secp256k1_strauss_state {
+    /* aux is used to hold z-ratios, and then used to hold pre_a[i].x * BETA values. */
+    secp256k1_fe* aux;
+    secp256k1_ge* pre_a;
+    struct secp256k1_strauss_point_state* ps;
+};
+
+static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *state, secp256k1_gej *r, size_t num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
+    secp256k1_ge tmpa;
+    secp256k1_fe Z;
+    /* Split G factors. */
+    secp256k1_scalar ng_1, ng_128;
+    int wnaf_ng_1[129];
+    int bits_ng_1 = 0;
+    int wnaf_ng_128[129];
+    int bits_ng_128 = 0;
+    int i;
+    int bits = 0;
+    size_t np;
+    size_t no = 0;
+
+    secp256k1_fe_set_int(&Z, 1);
+    for (np = 0; np < num; ++np) {
+        secp256k1_gej tmp;
+        secp256k1_scalar na_1, na_lam;
+        if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
+            continue;
+        }
+        /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
+        secp256k1_scalar_split_lambda(&na_1, &na_lam, &na[np]);
+
+        /* build wnaf representation for na_1 and na_lam. */
+        state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   129, &na_1,   WINDOW_A);
+        state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 129, &na_lam, WINDOW_A);
+        VERIFY_CHECK(state->ps[no].bits_na_1 <= 129);
+        VERIFY_CHECK(state->ps[no].bits_na_lam <= 129);
+        if (state->ps[no].bits_na_1 > bits) {
+            bits = state->ps[no].bits_na_1;
+        }
+        if (state->ps[no].bits_na_lam > bits) {
+            bits = state->ps[no].bits_na_lam;
+        }
+
+        /* Calculate odd multiples of a.
+         * All multiples are brought to the same Z 'denominator', which is stored
+         * in Z. Due to secp256k1' isomorphism we can do all operations pretending
+         * that the Z coordinate was 1, use affine addition formulae, and correct
+         * the Z coordinate of the result once at the end.
+         * The exception is the precomputed G table points, which are actually
+         * affine. Compared to the base used for other points, they have a Z ratio
+         * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
+         * isomorphism to efficiently add with a known Z inverse.
+         */
+        tmp = a[np];
+        if (no) {
+            secp256k1_gej_rescale(&tmp, &Z);
+        }
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
+        if (no) secp256k1_fe_mul(state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &(a[np].z));
+
+        ++no;
+    }
+
+    /* Bring them to the same Z denominator. */
+    if (no) {
+        secp256k1_ge_table_set_globalz(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->aux);
+    }
+
+    for (np = 0; np < no; ++np) {
+        for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+            secp256k1_fe_mul(&state->aux[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i].x, &secp256k1_const_beta);
+        }
+    }
+
+    if (ng) {
+        /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
+        secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
+
+        /* Build wnaf representation for ng_1 and ng_128 */
+        bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   129, &ng_1,   WINDOW_G);
+        bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
+        if (bits_ng_1 > bits) {
+            bits = bits_ng_1;
+        }
+        if (bits_ng_128 > bits) {
+            bits = bits_ng_128;
+        }
+    }
+
+    secp256k1_gej_set_infinity(r);
+
+    for (i = bits - 1; i >= 0; i--) {
+        int n;
+        secp256k1_gej_double_var(r, r, NULL);
+        for (np = 0; np < no; ++np) {
+            if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) {
+                secp256k1_ecmult_table_get_ge(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+            }
+            if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) {
+                secp256k1_ecmult_table_get_ge_lambda(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+            }
+        }
+        if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
+            secp256k1_ecmult_table_get_ge_storage(&tmpa, secp256k1_pre_g, n, WINDOW_G);
+            secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
+        }
+        if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
+            secp256k1_ecmult_table_get_ge_storage(&tmpa, secp256k1_pre_g_128, n, WINDOW_G);
+            secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
+        }
+    }
+
+    if (!r->infinity) {
+        secp256k1_fe_mul(&r->z, &r->z, &Z);
+    }
+}
+
+static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
+    secp256k1_fe aux[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    struct secp256k1_strauss_point_state ps[1];
+    struct secp256k1_strauss_state state;
+
+    state.aux = aux;
+    state.pre_a = pre_a;
+    state.ps = ps;
+    secp256k1_ecmult_strauss_wnaf(&state, r, 1, a, na, ng);
+}
+
+static size_t secp256k1_strauss_scratch_size(size_t n_points) {
+    static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+    return n_points*point_size;
+}
+
+static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
+    secp256k1_gej* points;
+    secp256k1_scalar* scalars;
+    struct secp256k1_strauss_state state;
+    size_t i;
+    const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
+
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n_points == 0) {
+        return 1;
+    }
+
+    /* We allocate STRAUSS_SCRATCH_OBJECTS objects on the scratch space. If these
+     * allocations change, make sure to update the STRAUSS_SCRATCH_OBJECTS
+     * constant and strauss_scratch_size accordingly. */
+    points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
+    scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
+    state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
+    state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
+    state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
+
+    if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.ps == NULL) {
+        secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+        return 0;
+    }
+
+    for (i = 0; i < n_points; i++) {
+        secp256k1_ge point;
+        if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) {
+            secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+            return 0;
+        }
+        secp256k1_gej_set_ge(&points[i], &point);
+    }
+    secp256k1_ecmult_strauss_wnaf(&state, r, n_points, points, scalars, inp_g_sc);
+    secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+    return 1;
+}
+
+/* Wrapper for secp256k1_ecmult_multi_func interface */
+static int secp256k1_ecmult_strauss_batch_single(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    return secp256k1_ecmult_strauss_batch(error_callback, scratch, r, inp_g_sc, cb, cbdata, n, 0);
+}
+
+static size_t secp256k1_strauss_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) {
+    return secp256k1_scratch_max_allocation(error_callback, scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1);
+}
+
+/** Convert a number to WNAF notation.
+ *  The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
+ *  It has the following guarantees:
+ *  - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w)
+ *  - the number of words set is always WNAF_SIZE(w)
+ *  - the returned skew is 0 or 1
+ */
+static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) {
+    int skew = 0;
+    int pos;
+    int max_pos;
+    int last_w;
+    const secp256k1_scalar *work = s;
+
+    if (secp256k1_scalar_is_zero(s)) {
+        for (pos = 0; pos < WNAF_SIZE(w); pos++) {
+            wnaf[pos] = 0;
+        }
+        return 0;
+    }
+
+    if (secp256k1_scalar_is_even(s)) {
+        skew = 1;
+    }
+
+    wnaf[0] = secp256k1_scalar_get_bits_var(work, 0, w) + skew;
+    /* Compute last window size. Relevant when window size doesn't divide the
+     * number of bits in the scalar */
+    last_w = WNAF_BITS - (WNAF_SIZE(w) - 1) * w;
+
+    /* Store the position of the first nonzero word in max_pos to allow
+     * skipping leading zeros when calculating the wnaf. */
+    for (pos = WNAF_SIZE(w) - 1; pos > 0; pos--) {
+        int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
+        if(val != 0) {
+            break;
+        }
+        wnaf[pos] = 0;
+    }
+    max_pos = pos;
+    pos = 1;
+
+    while (pos <= max_pos) {
+        int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
+        if ((val & 1) == 0) {
+            wnaf[pos - 1] -= (1 << w);
+            wnaf[pos] = (val + 1);
+        } else {
+            wnaf[pos] = val;
+        }
+        /* Set a coefficient to zero if it is 1 or -1 and the proceeding digit
+         * is strictly negative or strictly positive respectively. Only change
+         * coefficients at previous positions because above code assumes that
+         * wnaf[pos - 1] is odd.
+         */
+        if (pos >= 2 && ((wnaf[pos - 1] == 1 && wnaf[pos - 2] < 0) || (wnaf[pos - 1] == -1 && wnaf[pos - 2] > 0))) {
+            if (wnaf[pos - 1] == 1) {
+                wnaf[pos - 2] += 1 << w;
+            } else {
+                wnaf[pos - 2] -= 1 << w;
+            }
+            wnaf[pos - 1] = 0;
+        }
+        ++pos;
+    }
+
+    return skew;
+}
+
+struct secp256k1_pippenger_point_state {
+    int skew_na;
+    size_t input_pos;
+};
+
+struct secp256k1_pippenger_state {
+    int *wnaf_na;
+    struct secp256k1_pippenger_point_state* ps;
+};
+
+/*
+ * pippenger_wnaf computes the result of a multi-point multiplication as
+ * follows: The scalars are brought into wnaf with n_wnaf elements each. Then
+ * for every i < n_wnaf, first each point is added to a "bucket" corresponding
+ * to the point's wnaf[i]. Second, the buckets are added together such that
+ * r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ...
+ */
+static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, const secp256k1_scalar *sc, const secp256k1_ge *pt, size_t num) {
+    size_t n_wnaf = WNAF_SIZE(bucket_window+1);
+    size_t np;
+    size_t no = 0;
+    int i;
+    int j;
+
+    for (np = 0; np < num; ++np) {
+        if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) {
+            continue;
+        }
+        state->ps[no].input_pos = np;
+        state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1);
+        no++;
+    }
+    secp256k1_gej_set_infinity(r);
+
+    if (no == 0) {
+        return 1;
+    }
+
+    for (i = n_wnaf - 1; i >= 0; i--) {
+        secp256k1_gej running_sum;
+
+        for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) {
+            secp256k1_gej_set_infinity(&buckets[j]);
+        }
+
+        for (np = 0; np < no; ++np) {
+            int n = state->wnaf_na[np*n_wnaf + i];
+            struct secp256k1_pippenger_point_state point_state = state->ps[np];
+            secp256k1_ge tmp;
+            int idx;
+
+            if (i == 0) {
+                /* correct for wnaf skew */
+                int skew = point_state.skew_na;
+                if (skew) {
+                    secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
+                    secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL);
+                }
+            }
+            if (n > 0) {
+                idx = (n - 1)/2;
+                secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL);
+            } else if (n < 0) {
+                idx = -(n + 1)/2;
+                secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
+                secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL);
+            }
+        }
+
+        for(j = 0; j < bucket_window; j++) {
+            secp256k1_gej_double_var(r, r, NULL);
+        }
+
+        secp256k1_gej_set_infinity(&running_sum);
+        /* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ...
+         *                   = bucket[0] +   bucket[1] +   bucket[2] +   bucket[3] + ...
+         *                   +         2 *  (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...)
+         * using an intermediate running sum:
+         * running_sum = bucket[0] +   bucket[1] +   bucket[2] + ...
+         *
+         * The doubling is done implicitly by deferring the final window doubling (of 'r').
+         */
+        for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) {
+            secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL);
+            secp256k1_gej_add_var(r, r, &running_sum, NULL);
+        }
+
+        secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL);
+        secp256k1_gej_double_var(r, r, NULL);
+        secp256k1_gej_add_var(r, r, &running_sum, NULL);
+    }
+    return 1;
+}
+
+/**
+ * Returns optimal bucket_window (number of bits of a scalar represented by a
+ * set of buckets) for a given number of points.
+ */
+static int secp256k1_pippenger_bucket_window(size_t n) {
+    if (n <= 1) {
+        return 1;
+    } else if (n <= 4) {
+        return 2;
+    } else if (n <= 20) {
+        return 3;
+    } else if (n <= 57) {
+        return 4;
+    } else if (n <= 136) {
+        return 5;
+    } else if (n <= 235) {
+        return 6;
+    } else if (n <= 1260) {
+        return 7;
+    } else if (n <= 4420) {
+        return 9;
+    } else if (n <= 7880) {
+        return 10;
+    } else if (n <= 16050) {
+        return 11;
+    } else {
+        return PIPPENGER_MAX_BUCKET_WINDOW;
+    }
+}
+
+/**
+ * Returns the maximum optimal number of points for a bucket_window.
+ */
+static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) {
+    switch(bucket_window) {
+        case 1: return 1;
+        case 2: return 4;
+        case 3: return 20;
+        case 4: return 57;
+        case 5: return 136;
+        case 6: return 235;
+        case 7: return 1260;
+        case 8: return 1260;
+        case 9: return 4420;
+        case 10: return 7880;
+        case 11: return 16050;
+        case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
+    }
+    return 0;
+}
+
+
+SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) {
+    secp256k1_scalar tmp = *s1;
+    secp256k1_scalar_split_lambda(s1, s2, &tmp);
+    secp256k1_ge_mul_lambda(p2, p1);
+
+    if (secp256k1_scalar_is_high(s1)) {
+        secp256k1_scalar_negate(s1, s1);
+        secp256k1_ge_neg(p1, p1);
+    }
+    if (secp256k1_scalar_is_high(s2)) {
+        secp256k1_scalar_negate(s2, s2);
+        secp256k1_ge_neg(p2, p2);
+    }
+}
+
+/**
+ * Returns the scratch size required for a given number of points (excluding
+ * base point G) without considering alignment.
+ */
+static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) {
+    size_t entries = 2*n_points + 2;
+    size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
+    return (sizeof(secp256k1_gej) << bucket_window) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size;
+}
+
+static int secp256k1_ecmult_pippenger_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
+    const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
+    /* Use 2(n+1) with the endomorphism, when calculating batch
+     * sizes. The reason for +1 is that we add the G scalar to the list of
+     * other scalars. */
+    size_t entries = 2*n_points + 2;
+    secp256k1_ge *points;
+    secp256k1_scalar *scalars;
+    secp256k1_gej *buckets;
+    struct secp256k1_pippenger_state *state_space;
+    size_t idx = 0;
+    size_t point_idx = 0;
+    int bucket_window;
+
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n_points == 0) {
+        return 1;
+    }
+    bucket_window = secp256k1_pippenger_bucket_window(n_points);
+
+    /* We allocate PIPPENGER_SCRATCH_OBJECTS objects on the scratch space. If
+     * these allocations change, make sure to update the
+     * PIPPENGER_SCRATCH_OBJECTS constant and pippenger_scratch_size
+     * accordingly. */
+    points = (secp256k1_ge *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*points));
+    scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*scalars));
+    state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(error_callback, scratch, sizeof(*state_space));
+    if (points == NULL || scalars == NULL || state_space == NULL) {
+        secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+        return 0;
+    }
+    state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*state_space->ps));
+    state_space->wnaf_na = (int *) secp256k1_scratch_alloc(error_callback, scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int));
+    buckets = (secp256k1_gej *) secp256k1_scratch_alloc(error_callback, scratch, ((size_t)1 << bucket_window) * sizeof(*buckets));
+    if (state_space->ps == NULL || state_space->wnaf_na == NULL || buckets == NULL) {
+        secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+        return 0;
+    }
+
+    if (inp_g_sc != NULL) {
+        scalars[0] = *inp_g_sc;
+        points[0] = secp256k1_ge_const_g;
+        idx++;
+        secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]);
+        idx++;
+    }
+
+    while (point_idx < n_points) {
+        if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) {
+            secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+            return 0;
+        }
+        idx++;
+        secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]);
+        idx++;
+        point_idx++;
+    }
+
+    secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx);
+    secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+    return 1;
+}
+
+/* Wrapper for secp256k1_ecmult_multi_func interface */
+static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    return secp256k1_ecmult_pippenger_batch(error_callback, scratch, r, inp_g_sc, cb, cbdata, n, 0);
+}
+
+/**
+ * Returns the maximum number of points in addition to G that can be used with
+ * a given scratch space. The function ensures that fewer points may also be
+ * used.
+ */
+static size_t secp256k1_pippenger_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) {
+    size_t max_alloc = secp256k1_scratch_max_allocation(error_callback, scratch, PIPPENGER_SCRATCH_OBJECTS);
+    int bucket_window;
+    size_t res = 0;
+
+    for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) {
+        size_t n_points;
+        size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window);
+        size_t space_for_points;
+        size_t space_overhead;
+        size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
+
+        entry_size = 2*entry_size;
+        space_overhead = (sizeof(secp256k1_gej) << bucket_window) + entry_size + sizeof(struct secp256k1_pippenger_state);
+        if (space_overhead > max_alloc) {
+            break;
+        }
+        space_for_points = max_alloc - space_overhead;
+
+        n_points = space_for_points/entry_size;
+        n_points = n_points > max_points ? max_points : n_points;
+        if (n_points > res) {
+            res = n_points;
+        }
+        if (n_points < max_points) {
+            /* A larger bucket_window may support even more points. But if we
+             * would choose that then the caller couldn't safely use any number
+             * smaller than what this function returns */
+            break;
+        }
+    }
+    return res;
+}
+
+/* Computes ecmult_multi by simply multiplying and adding each point. Does not
+ * require a scratch space */
+static int secp256k1_ecmult_multi_simple_var(secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points) {
+    size_t point_idx;
+    secp256k1_gej tmpj;
+
+    secp256k1_gej_set_infinity(r);
+    secp256k1_gej_set_infinity(&tmpj);
+    /* r = inp_g_sc*G */
+    secp256k1_ecmult(r, &tmpj, &secp256k1_scalar_zero, inp_g_sc);
+    for (point_idx = 0; point_idx < n_points; point_idx++) {
+        secp256k1_ge point;
+        secp256k1_gej pointj;
+        secp256k1_scalar scalar;
+        if (!cb(&scalar, &point, point_idx, cbdata)) {
+            return 0;
+        }
+        /* r += scalar*point */
+        secp256k1_gej_set_ge(&pointj, &point);
+        secp256k1_ecmult(&tmpj, &pointj, &scalar, NULL);
+        secp256k1_gej_add_var(r, r, &tmpj, NULL);
+    }
+    return 1;
+}
+
+/* Compute the number of batches and the batch size given the maximum batch size and the
+ * total number of points */
+static int secp256k1_ecmult_multi_batch_size_helper(size_t *n_batches, size_t *n_batch_points, size_t max_n_batch_points, size_t n) {
+    if (max_n_batch_points == 0) {
+        return 0;
+    }
+    if (max_n_batch_points > ECMULT_MAX_POINTS_PER_BATCH) {
+        max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH;
+    }
+    if (n == 0) {
+        *n_batches = 0;
+        *n_batch_points = 0;
+        return 1;
+    }
+    /* Compute ceil(n/max_n_batch_points) and ceil(n/n_batches) */
+    *n_batches = CEIL_DIV(n, max_n_batch_points);
+    *n_batch_points = CEIL_DIV(n, *n_batches);
+    return 1;
+}
+
+typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_callback* error_callback, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t);
+static int secp256k1_ecmult_multi_var(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    size_t i;
+
+    int (*f)(const secp256k1_callback* error_callback, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t);
+    size_t n_batches;
+    size_t n_batch_points;
+
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n == 0) {
+        return 1;
+    } else if (n == 0) {
+        secp256k1_ecmult(r, r, &secp256k1_scalar_zero, inp_g_sc);
+        return 1;
+    }
+    if (scratch == NULL) {
+        return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n);
+    }
+
+    /* Compute the batch sizes for Pippenger's algorithm given a scratch space. If it's greater than
+     * a threshold use Pippenger's algorithm. Otherwise use Strauss' algorithm.
+     * As a first step check if there's enough space for Pippenger's algo (which requires less space
+     * than Strauss' algo) and if not, use the simple algorithm. */
+    if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_pippenger_max_points(error_callback, scratch), n)) {
+        return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n);
+    }
+    if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) {
+        f = secp256k1_ecmult_pippenger_batch;
+    } else {
+        if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_strauss_max_points(error_callback, scratch), n)) {
+            return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n);
+        }
+        f = secp256k1_ecmult_strauss_batch;
+    }
+    for(i = 0; i < n_batches; i++) {
+        size_t nbp = n < n_batch_points ? n : n_batch_points;
+        size_t offset = n_batch_points*i;
+        secp256k1_gej tmp;
+        if (!f(error_callback, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) {
+            return 0;
+        }
+        secp256k1_gej_add_var(r, r, &tmp, NULL);
+        n -= nbp;
+    }
+    return 1;
+}
+
+#endif /* SECP256K1_ECMULT_IMPL_H */
--- a/libsecp256k1/src/field.h
+++ b/libsecp256k1/src/field.h
@@ -0,0 +1,350 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_H
+#define SECP256K1_FIELD_H
+
+#include "util.h"
+
+/* This file defines the generic interface for working with secp256k1_fe
+ * objects, which represent field elements (integers modulo 2^256 - 2^32 - 977).
+ *
+ * The actual definition of the secp256k1_fe type depends on the chosen field
+ * implementation; see the field_5x52.h and field_10x26.h files for details.
+ *
+ * All secp256k1_fe objects have implicit properties that determine what
+ * operations are permitted on it. These are purely a function of what
+ * secp256k1_fe_ operations are applied on it, generally (implicitly) fixed at
+ * compile time, and do not depend on the chosen field implementation. Despite
+ * that, what these properties actually entail for the field representation
+ * values depends on the chosen field implementation. These properties are:
+ * - magnitude: an integer in [0,32]
+ * - normalized: 0 or 1; normalized=1 implies magnitude <= 1.
+ *
+ * In VERIFY mode, they are materialized explicitly as fields in the struct,
+ * allowing run-time verification of these properties. In that case, the field
+ * implementation also provides a secp256k1_fe_verify routine to verify that
+ * these fields match the run-time value and perform internal consistency
+ * checks. */
+#ifdef VERIFY
+#  define SECP256K1_FE_VERIFY_FIELDS \
+    int magnitude; \
+    int normalized;
+#else
+#  define SECP256K1_FE_VERIFY_FIELDS
+#endif
+
+#if defined(SECP256K1_WIDEMUL_INT128)
+#include "field_5x52.h"
+#elif defined(SECP256K1_WIDEMUL_INT64)
+#include "field_10x26.h"
+#else
+#error "Please select wide multiplication implementation"
+#endif
+
+#ifdef VERIFY
+/* Magnitude and normalized value for constants. */
+#define SECP256K1_FE_VERIFY_CONST(d7, d6, d5, d4, d3, d2, d1, d0) \
+    /* Magnitude is 0 for constant 0; 1 otherwise. */ \
+    , (((d7) | (d6) | (d5) | (d4) | (d3) | (d2) | (d1) | (d0)) != 0) \
+    /* Normalized is 1 unless sum(d_i<<(32*i) for i=0..7) exceeds field modulus. */ \
+    , (!(((d7) & (d6) & (d5) & (d4) & (d3) & (d2)) == 0xfffffffful && ((d1) == 0xfffffffful || ((d1) == 0xfffffffe && (d0 >= 0xfffffc2f)))))
+#else
+#define SECP256K1_FE_VERIFY_CONST(d7, d6, d5, d4, d3, d2, d1, d0)
+#endif
+
+/** This expands to an initializer for a secp256k1_fe valued sum((i*32) * d_i, i=0..7) mod p.
+ *
+ * It has magnitude 1, unless d_i are all 0, in which case the magnitude is 0.
+ * It is normalized, unless sum(2^(i*32) * d_i, i=0..7) >= p.
+ *
+ * SECP256K1_FE_CONST_INNER is provided by the implementation.
+ */
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)) SECP256K1_FE_VERIFY_CONST((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)) }
+
+static const secp256k1_fe secp256k1_fe_one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+static const secp256k1_fe secp256k1_const_beta = SECP256K1_FE_CONST(
+    0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul,
+    0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul
+);
+
+#ifndef VERIFY
+/* In non-VERIFY mode, we #define the fe operations to be identical to their
+ * internal field implementation, to avoid the potential overhead of a
+ * function call (even though presumably inlinable). */
+#  define secp256k1_fe_normalize secp256k1_fe_impl_normalize
+#  define secp256k1_fe_normalize_weak secp256k1_fe_impl_normalize_weak
+#  define secp256k1_fe_normalize_var secp256k1_fe_impl_normalize_var
+#  define secp256k1_fe_normalizes_to_zero secp256k1_fe_impl_normalizes_to_zero
+#  define secp256k1_fe_normalizes_to_zero_var secp256k1_fe_impl_normalizes_to_zero_var
+#  define secp256k1_fe_set_int secp256k1_fe_impl_set_int
+#  define secp256k1_fe_is_zero secp256k1_fe_impl_is_zero
+#  define secp256k1_fe_is_odd secp256k1_fe_impl_is_odd
+#  define secp256k1_fe_cmp_var secp256k1_fe_impl_cmp_var
+#  define secp256k1_fe_set_b32_mod secp256k1_fe_impl_set_b32_mod
+#  define secp256k1_fe_set_b32_limit secp256k1_fe_impl_set_b32_limit
+#  define secp256k1_fe_get_b32 secp256k1_fe_impl_get_b32
+#  define secp256k1_fe_negate_unchecked secp256k1_fe_impl_negate_unchecked
+#  define secp256k1_fe_mul_int_unchecked secp256k1_fe_impl_mul_int_unchecked
+#  define secp256k1_fe_add secp256k1_fe_impl_add
+#  define secp256k1_fe_mul secp256k1_fe_impl_mul
+#  define secp256k1_fe_sqr secp256k1_fe_impl_sqr
+#  define secp256k1_fe_cmov secp256k1_fe_impl_cmov
+#  define secp256k1_fe_to_storage secp256k1_fe_impl_to_storage
+#  define secp256k1_fe_from_storage secp256k1_fe_impl_from_storage
+#  define secp256k1_fe_inv secp256k1_fe_impl_inv
+#  define secp256k1_fe_inv_var secp256k1_fe_impl_inv_var
+#  define secp256k1_fe_get_bounds secp256k1_fe_impl_get_bounds
+#  define secp256k1_fe_half secp256k1_fe_impl_half
+#  define secp256k1_fe_add_int secp256k1_fe_impl_add_int
+#  define secp256k1_fe_is_square_var secp256k1_fe_impl_is_square_var
+#endif /* !defined(VERIFY) */
+
+/** Normalize a field element.
+ *
+ * On input, r must be a valid field element.
+ * On output, r represents the same value but has normalized=1 and magnitude=1.
+ */
+static void secp256k1_fe_normalize(secp256k1_fe *r);
+
+/** Give a field element magnitude 1.
+ *
+ * On input, r must be a valid field element.
+ * On output, r represents the same value but has magnitude=1. Normalized is unchanged.
+ */
+static void secp256k1_fe_normalize_weak(secp256k1_fe *r);
+
+/** Normalize a field element, without constant-time guarantee.
+ *
+ * Identical in behavior to secp256k1_fe_normalize, but not constant time in r.
+ */
+static void secp256k1_fe_normalize_var(secp256k1_fe *r);
+
+/** Determine whether r represents field element 0.
+ *
+ * On input, r must be a valid field element.
+ * Returns whether r = 0 (mod p).
+ */
+static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r);
+
+/** Determine whether r represents field element 0, without constant-time guarantee.
+ *
+ * Identical in behavior to secp256k1_normalizes_to_zero, but not constant time in r.
+ */
+static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r);
+
+/** Set a field element to an integer in range [0,0x7FFF].
+ *
+ * On input, r does not need to be initialized, a must be in [0,0x7FFF].
+ * On output, r represents value a, is normalized and has magnitude (a!=0).
+ */
+static void secp256k1_fe_set_int(secp256k1_fe *r, int a);
+
+/** Clear a field element to prevent leaking sensitive information. */
+static void secp256k1_fe_clear(secp256k1_fe *a);
+
+/** Determine whether a represents field element 0.
+ *
+ * On input, a must be a valid normalized field element.
+ * Returns whether a = 0 (mod p).
+ *
+ * This behaves identical to secp256k1_normalizes_to_zero{,_var}, but requires
+ * normalized input (and is much faster).
+ */
+static int secp256k1_fe_is_zero(const secp256k1_fe *a);
+
+/** Determine whether a (mod p) is odd.
+ *
+ * On input, a must be a valid normalized field element.
+ * Returns (int(a) mod p) & 1.
+ */
+static int secp256k1_fe_is_odd(const secp256k1_fe *a);
+
+/** Determine whether two field elements are equal.
+ *
+ * On input, a and b must be valid field elements with magnitudes not exceeding
+ * 1 and 31, respectively.
+ * Returns a = b (mod p).
+ */
+static int secp256k1_fe_equal(const secp256k1_fe *a, const secp256k1_fe *b);
+
+/** Compare the values represented by 2 field elements, without constant-time guarantee.
+ *
+ * On input, a and b must be valid normalized field elements.
+ * Returns 1 if a > b, -1 if a < b, and 0 if a = b (comparisons are done as integers
+ * in range 0..p-1).
+ */
+static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b);
+
+/** Set a field element equal to the element represented by a provided 32-byte big endian value
+ * interpreted modulo p.
+ *
+ * On input, r does not need to be initialized. a must be a pointer to an initialized 32-byte array.
+ * On output, r = a (mod p). It will have magnitude 1, and not be normalized.
+ */
+static void secp256k1_fe_set_b32_mod(secp256k1_fe *r, const unsigned char *a);
+
+/** Set a field element equal to a provided 32-byte big endian value, checking for overflow.
+ *
+ * On input, r does not need to be initialized. a must be a pointer to an initialized 32-byte array.
+ * On output, r = a if (a < p), it will be normalized with magnitude 1, and 1 is returned.
+ * If a >= p, 0 is returned, and r will be made invalid (and must not be used without overwriting).
+ */
+static int secp256k1_fe_set_b32_limit(secp256k1_fe *r, const unsigned char *a);
+
+/** Convert a field element to 32-byte big endian byte array.
+ * On input, a must be a valid normalized field element, and r a pointer to a 32-byte array.
+ * On output, r = a (mod p).
+ */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a);
+
+/** Negate a field element.
+ *
+ * On input, r does not need to be initialized. a must be a valid field element with
+ * magnitude not exceeding m. m must be an integer constant expression in [0,31].
+ * Performs {r = -a}.
+ * On output, r will not be normalized, and will have magnitude m+1.
+ */
+#define secp256k1_fe_negate(r, a, m) ASSERT_INT_CONST_AND_DO(m, secp256k1_fe_negate_unchecked(r, a, m))
+
+/** Like secp256k1_fe_negate_unchecked but m is not checked to be an integer constant expression.
+ *
+ * Should not be called directly outside of tests.
+ */
+static void secp256k1_fe_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m);
+
+/** Add a small integer to a field element.
+ *
+ * Performs {r += a}. The magnitude of r increases by 1, and normalized is cleared.
+ * a must be in range [0,0x7FFF].
+ */
+static void secp256k1_fe_add_int(secp256k1_fe *r, int a);
+
+/** Multiply a field element with a small integer.
+ *
+ * On input, r must be a valid field element. a must be an integer constant expression in [0,32].
+ * The magnitude of r times a must not exceed 32.
+ * Performs {r *= a}.
+ * On output, r's magnitude is multiplied by a, and r will not be normalized.
+ */
+#define secp256k1_fe_mul_int(r, a) ASSERT_INT_CONST_AND_DO(a, secp256k1_fe_mul_int_unchecked(r, a))
+
+/** Like secp256k1_fe_mul_int but a is not checked to be an integer constant expression.
+ * 
+ * Should not be called directly outside of tests.
+ */
+static void secp256k1_fe_mul_int_unchecked(secp256k1_fe *r, int a);
+
+/** Increment a field element by another.
+ *
+ * On input, r and a must be valid field elements, not necessarily normalized.
+ * The sum of their magnitudes must not exceed 32.
+ * Performs {r += a}.
+ * On output, r will not be normalized, and will have magnitude incremented by a's.
+ */
+static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a);
+
+/** Multiply two field elements.
+ *
+ * On input, a and b must be valid field elements; r does not need to be initialized.
+ * r and a may point to the same object, but neither may point to the object pointed
+ * to by b. The magnitudes of a and b must not exceed 8.
+ * Performs {r = a * b}
+ * On output, r will have magnitude 1, but won't be normalized.
+ */
+static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b);
+
+/** Square a field element.
+ *
+ * On input, a must be a valid field element; r does not need to be initialized. The magnitude
+ * of a must not exceed 8.
+ * Performs {r = a**2}
+ * On output, r will have magnitude 1, but won't be normalized.
+ */
+static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a);
+
+/** Compute a square root of a field element.
+ *
+ * On input, a must be a valid field element with magnitude<=8; r need not be initialized.
+ * If sqrt(a) exists, performs {r = sqrt(a)} and returns 1.
+ * Otherwise, sqrt(-a) exists. The function performs {r = sqrt(-a)} and returns 0.
+ * The resulting value represented by r will be a square itself.
+ * Variables r and a must not point to the same object.
+ * On output, r will have magnitude 1 but will not be normalized.
+ */
+static int secp256k1_fe_sqrt(secp256k1_fe * SECP256K1_RESTRICT r, const secp256k1_fe * SECP256K1_RESTRICT a);
+
+/** Compute the modular inverse of a field element.
+ *
+ * On input, a must be a valid field element; r need not be initialized.
+ * Performs {r = a**(p-2)} (which maps 0 to 0, and every other element to its
+ * inverse).
+ * On output, r will have magnitude (a.magnitude != 0) and be normalized.
+ */
+static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a);
+
+/** Compute the modular inverse of a field element, without constant-time guarantee.
+ *
+ * Behaves identically to secp256k1_fe_inv, but is not constant-time in a.
+ */
+static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a);
+
+/** Convert a field element to secp256k1_fe_storage.
+ *
+ * On input, a must be a valid normalized field element.
+ * Performs {r = a}.
+ */
+static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a);
+
+/** Convert a field element back from secp256k1_fe_storage.
+ *
+ * On input, r need not be initialized.
+ * Performs {r = a}.
+ * On output, r will be normalized and will have magnitude 1.
+ */
+static void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
+static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag);
+
+/** Conditionally move a field element in constant time.
+ *
+ * On input, both r and a must be valid field elements. Flag must be 0 or 1.
+ * Performs {r = flag ? a : r}.
+ *
+ * On output, r's magnitude will be the maximum of both input magnitudes.
+ * It will be normalized if and only if both inputs were normalized.
+ */
+static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);
+
+/** Halve the value of a field element modulo the field prime in constant-time.
+ *
+ * On input, r must be a valid field element.
+ * On output, r will be normalized and have magnitude floor(m/2) + 1 where m is
+ * the magnitude of r on input.
+ */
+static void secp256k1_fe_half(secp256k1_fe *r);
+
+/** Sets r to a field element with magnitude m, normalized if (and only if) m==0.
+ *  The value is chosen so that it is likely to trigger edge cases related to
+ *  internal overflows. */
+static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m);
+
+/** Determine whether a is a square (modulo p).
+ *
+ * On input, a must be a valid field element.
+ */
+static int secp256k1_fe_is_square_var(const secp256k1_fe *a);
+
+/** Check invariants on a field element (no-op unless VERIFY is enabled). */
+static void secp256k1_fe_verify(const secp256k1_fe *a);
+#define SECP256K1_FE_VERIFY(a) secp256k1_fe_verify(a)
+
+/** Check that magnitude of a is at most m (no-op unless VERIFY is enabled). */
+static void secp256k1_fe_verify_magnitude(const secp256k1_fe *a, int m);
+#define SECP256K1_FE_VERIFY_MAGNITUDE(a, m) secp256k1_fe_verify_magnitude(a, m)
+
+#endif /* SECP256K1_FIELD_H */
--- a/libsecp256k1/src/field_10x26.h
+++ b/libsecp256k1/src/field_10x26.h
@@ -0,0 +1,57 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_REPR_H
+#define SECP256K1_FIELD_REPR_H
+
+#include <stdint.h>
+
+/** This field implementation represents the value as 10 uint32_t limbs in base
+ *  2^26. */
+typedef struct {
+   /* A field element f represents the sum(i=0..9, f.n[i] << (i*26)) mod p,
+    * where p is the field modulus, 2^256 - 2^32 - 977.
+    *
+    * The individual limbs f.n[i] can exceed 2^26; the field's magnitude roughly
+    * corresponds to how much excess is allowed. The value
+    * sum(i=0..9, f.n[i] << (i*26)) may exceed p, unless the field element is
+    * normalized. */
+    uint32_t n[10];
+    /*
+     * Magnitude m requires:
+     *     n[i] <= 2 * m * (2^26 - 1) for i=0..8
+     *     n[9] <= 2 * m * (2^22 - 1)
+     *
+     * Normalized requires:
+     *     n[i] <= (2^26 - 1) for i=0..8
+     *     sum(i=0..9, n[i] << (i*26)) < p
+     *     (together these imply n[9] <= 2^22 - 1)
+     */
+    SECP256K1_FE_VERIFY_FIELDS
+} secp256k1_fe;
+
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) & 0x3FFFFFFUL, \
+    (((uint32_t)d0) >> 26) | (((uint32_t)(d1) & 0xFFFFFUL) << 6), \
+    (((uint32_t)d1) >> 20) | (((uint32_t)(d2) & 0x3FFFUL) << 12), \
+    (((uint32_t)d2) >> 14) | (((uint32_t)(d3) & 0xFFUL) << 18), \
+    (((uint32_t)d3) >> 8) | (((uint32_t)(d4) & 0x3UL) << 24), \
+    (((uint32_t)d4) >> 2) & 0x3FFFFFFUL, \
+    (((uint32_t)d4) >> 28) | (((uint32_t)(d5) & 0x3FFFFFUL) << 4), \
+    (((uint32_t)d5) >> 22) | (((uint32_t)(d6) & 0xFFFFUL) << 10), \
+    (((uint32_t)d6) >> 16) | (((uint32_t)(d7) & 0x3FFUL) << 16), \
+    (((uint32_t)d7) >> 10) \
+}
+
+typedef struct {
+    uint32_t n[8];
+} secp256k1_fe_storage;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}
+#define SECP256K1_FE_STORAGE_CONST_GET(d) d.n[7], d.n[6], d.n[5], d.n[4],d.n[3], d.n[2], d.n[1], d.n[0]
+
+#endif /* SECP256K1_FIELD_REPR_H */
--- a/libsecp256k1/src/field_10x26_impl.h
+++ b/libsecp256k1/src/field_10x26_impl.h
--- a/libsecp256k1/src/field_5x52.h
+++ b/libsecp256k1/src/field_5x52.h
@@ -0,0 +1,62 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_REPR_H
+#define SECP256K1_FIELD_REPR_H
+
+#include <stdint.h>
+
+/** This field implementation represents the value as 5 uint64_t limbs in base
+ *  2^52. */
+typedef struct {
+   /* A field element f represents the sum(i=0..4, f.n[i] << (i*52)) mod p,
+    * where p is the field modulus, 2^256 - 2^32 - 977.
+    *
+    * The individual limbs f.n[i] can exceed 2^52; the field's magnitude roughly
+    * corresponds to how much excess is allowed. The value
+    * sum(i=0..4, f.n[i] << (i*52)) may exceed p, unless the field element is
+    * normalized. */
+    uint64_t n[5];
+    /*
+     * Magnitude m requires:
+     *     n[i] <= 2 * m * (2^52 - 1) for i=0..3
+     *     n[4] <= 2 * m * (2^48 - 1)
+     *
+     * Normalized requires:
+     *     n[i] <= (2^52 - 1) for i=0..3
+     *     sum(i=0..4, n[i] << (i*52)) < p
+     *     (together these imply n[4] <= 2^48 - 1)
+     */
+    SECP256K1_FE_VERIFY_FIELDS
+} secp256k1_fe;
+
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) | (((uint64_t)(d1) & 0xFFFFFUL) << 32), \
+    ((uint64_t)(d1) >> 20) | (((uint64_t)(d2)) << 12) | (((uint64_t)(d3) & 0xFFUL) << 44), \
+    ((uint64_t)(d3) >> 8) | (((uint64_t)(d4) & 0xFFFFFFFUL) << 24), \
+    ((uint64_t)(d4) >> 28) | (((uint64_t)(d5)) << 4) | (((uint64_t)(d6) & 0xFFFFUL) << 36), \
+    ((uint64_t)(d6) >> 16) | (((uint64_t)(d7)) << 16) \
+}
+
+typedef struct {
+    uint64_t n[4];
+} secp256k1_fe_storage;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ \
+    (d0) | (((uint64_t)(d1)) << 32), \
+    (d2) | (((uint64_t)(d3)) << 32), \
+    (d4) | (((uint64_t)(d5)) << 32), \
+    (d6) | (((uint64_t)(d7)) << 32) \
+}}
+
+#define SECP256K1_FE_STORAGE_CONST_GET(d) \
+    (uint32_t)(d.n[3] >> 32), (uint32_t)d.n[3], \
+    (uint32_t)(d.n[2] >> 32), (uint32_t)d.n[2], \
+    (uint32_t)(d.n[1] >> 32), (uint32_t)d.n[1], \
+    (uint32_t)(d.n[0] >> 32), (uint32_t)d.n[0]
+
+#endif /* SECP256K1_FIELD_REPR_H */
--- a/libsecp256k1/src/field_5x52_impl.h
+++ b/libsecp256k1/src/field_5x52_impl.h
@@ -0,0 +1,522 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_REPR_IMPL_H
+#define SECP256K1_FIELD_REPR_IMPL_H
+
+#include "checkmem.h"
+#include "util.h"
+#include "field.h"
+#include "modinv64_impl.h"
+
+#include "field_5x52_int128_impl.h"
+
+#ifdef VERIFY
+static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
+    const uint64_t *d = a->n;
+    int m = a->normalized ? 1 : 2 * a->magnitude;
+   /* secp256k1 'p' value defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
+    VERIFY_CHECK(d[0] <= 0xFFFFFFFFFFFFFULL * m);
+    VERIFY_CHECK(d[1] <= 0xFFFFFFFFFFFFFULL * m);
+    VERIFY_CHECK(d[2] <= 0xFFFFFFFFFFFFFULL * m);
+    VERIFY_CHECK(d[3] <= 0xFFFFFFFFFFFFFULL * m);
+    VERIFY_CHECK(d[4] <= 0x0FFFFFFFFFFFFULL * m);
+    if (a->normalized) {
+        if ((d[4] == 0x0FFFFFFFFFFFFULL) && ((d[3] & d[2] & d[1]) == 0xFFFFFFFFFFFFFULL)) {
+            VERIFY_CHECK(d[0] < 0xFFFFEFFFFFC2FULL);
+        }
+    }
+}
+#endif
+
+static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) {
+    r->n[0] = 0xFFFFFFFFFFFFFULL * 2 * m;
+    r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * m;
+    r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * m;
+    r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * m;
+    r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * m;
+}
+
+static void secp256k1_fe_impl_normalize(secp256k1_fe *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
+        & (t0 >= 0xFFFFEFFFFFC2FULL));
+
+    /* Apply the final reduction (for constant-time behaviour, we do it always) */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
+    VERIFY_CHECK(t4 >> 48 == x);
+
+    /* Mask off the possible multiple of 2^256 from the final reduction */
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+}
+
+static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+}
+
+static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
+        & (t0 >= 0xFFFFEFFFFFC2FULL));
+
+    if (x) {
+        t0 += 0x1000003D1ULL;
+        t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+        t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+        t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+        t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+        /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
+        VERIFY_CHECK(t4 >> 48 == x);
+
+        /* Mask off the possible multiple of 2^256 from the final reduction */
+        t4 &= 0x0FFFFFFFFFFFFULL;
+    }
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+}
+
+static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint64_t z0, z1;
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0  = t0; z1  = t0 ^ 0x1000003D0ULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
+static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) {
+    uint64_t t0, t1, t2, t3, t4;
+    uint64_t z0, z1;
+    uint64_t x;
+
+    t0 = r->n[0];
+    t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    x = t4 >> 48;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    z0 = t0 & 0xFFFFFFFFFFFFFULL;
+    z1 = z0 ^ 0x1000003D0ULL;
+
+    /* Fast return path should catch the majority of cases */
+    if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL)) {
+        return 0;
+    }
+
+    t1 = r->n[1];
+    t2 = r->n[2];
+    t3 = r->n[3];
+
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    t1 += (t0 >> 52);
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
+    r->n[0] = a;
+    r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) {
+    const uint64_t *t = a->n;
+    return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {
+    return a->n[0] & 1;
+}
+
+static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
+    int i;
+    for (i = 4; i >= 0; i--) {
+        if (a->n[i] > b->n[i]) {
+            return 1;
+        }
+        if (a->n[i] < b->n[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) {
+    r->n[0] = (uint64_t)a[31]
+            | ((uint64_t)a[30] << 8)
+            | ((uint64_t)a[29] << 16)
+            | ((uint64_t)a[28] << 24)
+            | ((uint64_t)a[27] << 32)
+            | ((uint64_t)a[26] << 40)
+            | ((uint64_t)(a[25] & 0xF)  << 48);
+    r->n[1] = (uint64_t)((a[25] >> 4) & 0xF)
+            | ((uint64_t)a[24] << 4)
+            | ((uint64_t)a[23] << 12)
+            | ((uint64_t)a[22] << 20)
+            | ((uint64_t)a[21] << 28)
+            | ((uint64_t)a[20] << 36)
+            | ((uint64_t)a[19] << 44);
+    r->n[2] = (uint64_t)a[18]
+            | ((uint64_t)a[17] << 8)
+            | ((uint64_t)a[16] << 16)
+            | ((uint64_t)a[15] << 24)
+            | ((uint64_t)a[14] << 32)
+            | ((uint64_t)a[13] << 40)
+            | ((uint64_t)(a[12] & 0xF) << 48);
+    r->n[3] = (uint64_t)((a[12] >> 4) & 0xF)
+            | ((uint64_t)a[11] << 4)
+            | ((uint64_t)a[10] << 12)
+            | ((uint64_t)a[9]  << 20)
+            | ((uint64_t)a[8]  << 28)
+            | ((uint64_t)a[7]  << 36)
+            | ((uint64_t)a[6]  << 44);
+    r->n[4] = (uint64_t)a[5]
+            | ((uint64_t)a[4] << 8)
+            | ((uint64_t)a[3] << 16)
+            | ((uint64_t)a[2] << 24)
+            | ((uint64_t)a[1] << 32)
+            | ((uint64_t)a[0] << 40);
+}
+
+static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) {
+    secp256k1_fe_impl_set_b32_mod(r, a);
+    return !((r->n[4] == 0x0FFFFFFFFFFFFULL) & ((r->n[3] & r->n[2] & r->n[1]) == 0xFFFFFFFFFFFFFULL) & (r->n[0] >= 0xFFFFEFFFFFC2FULL));
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
+    r[0] = (a->n[4] >> 40) & 0xFF;
+    r[1] = (a->n[4] >> 32) & 0xFF;
+    r[2] = (a->n[4] >> 24) & 0xFF;
+    r[3] = (a->n[4] >> 16) & 0xFF;
+    r[4] = (a->n[4] >> 8) & 0xFF;
+    r[5] = a->n[4] & 0xFF;
+    r[6] = (a->n[3] >> 44) & 0xFF;
+    r[7] = (a->n[3] >> 36) & 0xFF;
+    r[8] = (a->n[3] >> 28) & 0xFF;
+    r[9] = (a->n[3] >> 20) & 0xFF;
+    r[10] = (a->n[3] >> 12) & 0xFF;
+    r[11] = (a->n[3] >> 4) & 0xFF;
+    r[12] = ((a->n[2] >> 48) & 0xF) | ((a->n[3] & 0xF) << 4);
+    r[13] = (a->n[2] >> 40) & 0xFF;
+    r[14] = (a->n[2] >> 32) & 0xFF;
+    r[15] = (a->n[2] >> 24) & 0xFF;
+    r[16] = (a->n[2] >> 16) & 0xFF;
+    r[17] = (a->n[2] >> 8) & 0xFF;
+    r[18] = a->n[2] & 0xFF;
+    r[19] = (a->n[1] >> 44) & 0xFF;
+    r[20] = (a->n[1] >> 36) & 0xFF;
+    r[21] = (a->n[1] >> 28) & 0xFF;
+    r[22] = (a->n[1] >> 20) & 0xFF;
+    r[23] = (a->n[1] >> 12) & 0xFF;
+    r[24] = (a->n[1] >> 4) & 0xFF;
+    r[25] = ((a->n[0] >> 48) & 0xF) | ((a->n[1] & 0xF) << 4);
+    r[26] = (a->n[0] >> 40) & 0xFF;
+    r[27] = (a->n[0] >> 32) & 0xFF;
+    r[28] = (a->n[0] >> 24) & 0xFF;
+    r[29] = (a->n[0] >> 16) & 0xFF;
+    r[30] = (a->n[0] >> 8) & 0xFF;
+    r[31] = a->n[0] & 0xFF;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
+    /* For all legal values of m (0..31), the following properties hold: */
+    VERIFY_CHECK(0xFFFFEFFFFFC2FULL * 2 * (m + 1) >= 0xFFFFFFFFFFFFFULL * 2 * m);
+    VERIFY_CHECK(0xFFFFFFFFFFFFFULL * 2 * (m + 1) >= 0xFFFFFFFFFFFFFULL * 2 * m);
+    VERIFY_CHECK(0x0FFFFFFFFFFFFULL * 2 * (m + 1) >= 0x0FFFFFFFFFFFFULL * 2 * m);
+
+    /* Due to the properties above, the left hand in the subtractions below is never less than
+     * the right hand. */
+    r->n[0] = 0xFFFFEFFFFFC2FULL * 2 * (m + 1) - a->n[0];
+    r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[1];
+    r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[2];
+    r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[3];
+    r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * (m + 1) - a->n[4];
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) {
+    r->n[0] *= a;
+    r->n[1] *= a;
+    r->n[2] *= a;
+    r->n[3] *= a;
+    r->n[4] *= a;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a) {
+    r->n[0] += a;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a) {
+    r->n[0] += a->n[0];
+    r->n[1] += a->n[1];
+    r->n[2] += a->n[2];
+    r->n[3] += a->n[3];
+    r->n[4] += a->n[4];
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
+    secp256k1_fe_mul_inner(r->n, a->n, b->n);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
+    secp256k1_fe_sqr_inner(r->n, a->n);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
+    uint64_t mask0, mask1;
+    volatile int vflag = flag;
+    SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
+    mask0 = vflag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+}
+
+static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+    uint64_t one = (uint64_t)1;
+    uint64_t mask = -(t0 & one) >> 12;
+
+    /* Bounds analysis (over the rationals).
+     *
+     * Let m = r->magnitude
+     *     C = 0xFFFFFFFFFFFFFULL * 2
+     *     D = 0x0FFFFFFFFFFFFULL * 2
+     *
+     * Initial bounds: t0..t3 <= C * m
+     *                     t4 <= D * m
+     */
+
+    t0 += 0xFFFFEFFFFFC2FULL & mask;
+    t1 += mask;
+    t2 += mask;
+    t3 += mask;
+    t4 += mask >> 4;
+
+    VERIFY_CHECK((t0 & one) == 0);
+
+    /* t0..t3: added <= C/2
+     *     t4: added <= D/2
+     *
+     * Current bounds: t0..t3 <= C * (m + 1/2)
+     *                     t4 <= D * (m + 1/2)
+     */
+
+    r->n[0] = (t0 >> 1) + ((t1 & one) << 51);
+    r->n[1] = (t1 >> 1) + ((t2 & one) << 51);
+    r->n[2] = (t2 >> 1) + ((t3 & one) << 51);
+    r->n[3] = (t3 >> 1) + ((t4 & one) << 51);
+    r->n[4] = (t4 >> 1);
+
+    /* t0..t3: shifted right and added <= C/4 + 1/2
+     *     t4: shifted right
+     *
+     * Current bounds: t0..t3 <= C * (m/2 + 1/2)
+     *                     t4 <= D * (m/2 + 1/4)
+     *
+     * Therefore the output magnitude (M) has to be set such that:
+     *     t0..t3: C * M >= C * (m/2 + 1/2)
+     *         t4: D * M >= D * (m/2 + 1/4)
+     *
+     * It suffices for all limbs that, for any input magnitude m:
+     *     M >= m/2 + 1/2
+     *
+     * and since we want the smallest such integer value for M:
+     *     M == floor(m/2) + 1
+     */
+}
+
+static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
+    uint64_t mask0, mask1;
+    volatile int vflag = flag;
+    SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
+    mask0 = vflag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+}
+
+static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
+    r->n[0] = a->n[0] | a->n[1] << 52;
+    r->n[1] = a->n[1] >> 12 | a->n[2] << 40;
+    r->n[2] = a->n[2] >> 24 | a->n[3] << 28;
+    r->n[3] = a->n[3] >> 36 | a->n[4] << 16;
+}
+
+static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
+    r->n[0] = a->n[0] & 0xFFFFFFFFFFFFFULL;
+    r->n[1] = a->n[0] >> 52 | ((a->n[1] << 12) & 0xFFFFFFFFFFFFFULL);
+    r->n[2] = a->n[1] >> 40 | ((a->n[2] << 24) & 0xFFFFFFFFFFFFFULL);
+    r->n[3] = a->n[2] >> 28 | ((a->n[3] << 36) & 0xFFFFFFFFFFFFFULL);
+    r->n[4] = a->n[3] >> 16;
+}
+
+static void secp256k1_fe_from_signed62(secp256k1_fe *r, const secp256k1_modinv64_signed62 *a) {
+    const uint64_t M52 = UINT64_MAX >> 12;
+    const uint64_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4];
+
+    /* The output from secp256k1_modinv64{_var} should be normalized to range [0,modulus), and
+     * have limbs in [0,2^62). The modulus is < 2^256, so the top limb must be below 2^(256-62*4).
+     */
+    VERIFY_CHECK(a0 >> 62 == 0);
+    VERIFY_CHECK(a1 >> 62 == 0);
+    VERIFY_CHECK(a2 >> 62 == 0);
+    VERIFY_CHECK(a3 >> 62 == 0);
+    VERIFY_CHECK(a4 >> 8 == 0);
+
+    r->n[0] =  a0                   & M52;
+    r->n[1] = (a0 >> 52 | a1 << 10) & M52;
+    r->n[2] = (a1 >> 42 | a2 << 20) & M52;
+    r->n[3] = (a2 >> 32 | a3 << 30) & M52;
+    r->n[4] = (a3 >> 22 | a4 << 40);
+}
+
+static void secp256k1_fe_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_fe *a) {
+    const uint64_t M62 = UINT64_MAX >> 2;
+    const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4];
+
+    r->v[0] = (a0       | a1 << 52) & M62;
+    r->v[1] = (a1 >> 10 | a2 << 42) & M62;
+    r->v[2] = (a2 >> 20 | a3 << 32) & M62;
+    r->v[3] = (a3 >> 30 | a4 << 22) & M62;
+    r->v[4] =  a4 >> 40;
+}
+
+static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_fe = {
+    {{-0x1000003D1LL, 0, 0, 0, 256}},
+    0x27C7F6E22DDACACFLL
+};
+
+static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x) {
+    secp256k1_fe tmp = *x;
+    secp256k1_modinv64_signed62 s;
+
+    secp256k1_fe_normalize(&tmp);
+    secp256k1_fe_to_signed62(&s, &tmp);
+    secp256k1_modinv64(&s, &secp256k1_const_modinfo_fe);
+    secp256k1_fe_from_signed62(r, &s);
+}
+
+static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
+    secp256k1_fe tmp = *x;
+    secp256k1_modinv64_signed62 s;
+
+    secp256k1_fe_normalize_var(&tmp);
+    secp256k1_fe_to_signed62(&s, &tmp);
+    secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_fe);
+    secp256k1_fe_from_signed62(r, &s);
+}
+
+static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x) {
+    secp256k1_fe tmp;
+    secp256k1_modinv64_signed62 s;
+    int jac, ret;
+
+    tmp = *x;
+    secp256k1_fe_normalize_var(&tmp);
+    /* secp256k1_jacobi64_maybe_var cannot deal with input 0. */
+    if (secp256k1_fe_is_zero(&tmp)) return 1;
+    secp256k1_fe_to_signed62(&s, &tmp);
+    jac = secp256k1_jacobi64_maybe_var(&s, &secp256k1_const_modinfo_fe);
+    if (jac == 0) {
+        /* secp256k1_jacobi64_maybe_var failed to compute the Jacobi symbol. Fall back
+         * to computing a square root. This should be extremely rare with random
+         * input (except in VERIFY mode, where a lower iteration count is used). */
+        secp256k1_fe dummy;
+        ret = secp256k1_fe_sqrt(&dummy, &tmp);
+    } else {
+        ret = jac >= 0;
+    }
+    return ret;
+}
+
+#endif /* SECP256K1_FIELD_REPR_IMPL_H */
--- a/libsecp256k1/src/field_5x52_int128_impl.h
+++ b/libsecp256k1/src/field_5x52_int128_impl.h
@@ -0,0 +1,274 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
+#define SECP256K1_FIELD_INNER5X52_IMPL_H
+
+#include <stdint.h>
+
+#include "int128.h"
+#include "util.h"
+
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#define VERIFY_BITS_128(x, n) VERIFY_CHECK(secp256k1_u128_check_bits((x), (n)))
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
+    secp256k1_uint128 c, d;
+    uint64_t t3, t4, tx, u0;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+    VERIFY_BITS(b[0], 56);
+    VERIFY_BITS(b[1], 56);
+    VERIFY_BITS(b[2], 56);
+    VERIFY_BITS(b[3], 56);
+    VERIFY_BITS(b[4], 52);
+    VERIFY_CHECK(r != b);
+    VERIFY_CHECK(a != b);
+
+    /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  for 0 <= x <= 4, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  for 4 <= x <= 8, px is a shorthand for sum(a[i]*b[x-i], i=(x-4)..4)
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    secp256k1_u128_mul(&d, a0, b[3]);
+    secp256k1_u128_accum_mul(&d, a1, b[2]);
+    secp256k1_u128_accum_mul(&d, a2, b[1]);
+    secp256k1_u128_accum_mul(&d, a3, b[0]);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    secp256k1_u128_mul(&c, a4, b[4]);
+    VERIFY_BITS_128(&c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
+    VERIFY_BITS_128(&d, 115);
+    VERIFY_BITS_128(&c, 48);
+    /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS_128(&d, 63);
+    /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    secp256k1_u128_accum_mul(&d, a0, b[4]);
+    secp256k1_u128_accum_mul(&d, a1, b[3]);
+    secp256k1_u128_accum_mul(&d, a2, b[2]);
+    secp256k1_u128_accum_mul(&d, a3, b[1]);
+    secp256k1_u128_accum_mul(&d, a4, b[0]);
+    VERIFY_BITS_128(&d, 115);
+    /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
+    VERIFY_BITS_128(&d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS_128(&d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    secp256k1_u128_mul(&c, a0, b[0]);
+    VERIFY_BITS_128(&c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    secp256k1_u128_accum_mul(&d, a1, b[4]);
+    secp256k1_u128_accum_mul(&d, a2, b[3]);
+    secp256k1_u128_accum_mul(&d, a3, b[2]);
+    secp256k1_u128_accum_mul(&d, a4, b[1]);
+    VERIFY_BITS_128(&d, 114);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS_128(&d, 62);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    secp256k1_u128_accum_mul(&c, u0, R >> 4);
+    VERIFY_BITS_128(&c, 113);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[0], 52);
+    VERIFY_BITS_128(&c, 61);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    secp256k1_u128_accum_mul(&c, a0, b[1]);
+    secp256k1_u128_accum_mul(&c, a1, b[0]);
+    VERIFY_BITS_128(&c, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    secp256k1_u128_accum_mul(&d, a2, b[4]);
+    secp256k1_u128_accum_mul(&d, a3, b[3]);
+    secp256k1_u128_accum_mul(&d, a4, b[2]);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS_128(&c, 115);
+    VERIFY_BITS_128(&d, 62);
+    /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[1], 52);
+    VERIFY_BITS_128(&c, 63);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    secp256k1_u128_accum_mul(&c, a0, b[2]);
+    secp256k1_u128_accum_mul(&c, a1, b[1]);
+    secp256k1_u128_accum_mul(&c, a2, b[0]);
+    VERIFY_BITS_128(&c, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    secp256k1_u128_accum_mul(&d, a3, b[4]);
+    secp256k1_u128_accum_mul(&d, a4, b[3]);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
+    VERIFY_BITS_128(&c, 115);
+    VERIFY_BITS_128(&d, 50);
+    /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS_128(&c, 63);
+    /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
+    secp256k1_u128_accum_u64(&c, t3);
+    VERIFY_BITS_128(&c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS_128(&c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = secp256k1_u128_to_u64(&c) + t4;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
+    secp256k1_uint128 c, d;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    uint64_t t3, t4, tx, u0;
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+
+    /**  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    secp256k1_u128_mul(&d, a0*2, a3);
+    secp256k1_u128_accum_mul(&d, a1*2, a2);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    secp256k1_u128_mul(&c, a4, a4);
+    VERIFY_BITS_128(&c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
+    VERIFY_BITS_128(&d, 115);
+    VERIFY_BITS_128(&c, 48);
+    /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS_128(&d, 63);
+    /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    a4 *= 2;
+    secp256k1_u128_accum_mul(&d, a0, a4);
+    secp256k1_u128_accum_mul(&d, a1*2, a3);
+    secp256k1_u128_accum_mul(&d, a2, a2);
+    VERIFY_BITS_128(&d, 115);
+    /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
+    VERIFY_BITS_128(&d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS_128(&d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    secp256k1_u128_mul(&c, a0, a0);
+    VERIFY_BITS_128(&c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    secp256k1_u128_accum_mul(&d, a1, a4);
+    secp256k1_u128_accum_mul(&d, a2*2, a3);
+    VERIFY_BITS_128(&d, 114);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS_128(&d, 62);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    secp256k1_u128_accum_mul(&c, u0, R >> 4);
+    VERIFY_BITS_128(&c, 113);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[0], 52);
+    VERIFY_BITS_128(&c, 61);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    a0 *= 2;
+    secp256k1_u128_accum_mul(&c, a0, a1);
+    VERIFY_BITS_128(&c, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    secp256k1_u128_accum_mul(&d, a2, a4);
+    secp256k1_u128_accum_mul(&d, a3, a3);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
+    VERIFY_BITS_128(&c, 115);
+    VERIFY_BITS_128(&d, 62);
+    /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[1], 52);
+    VERIFY_BITS_128(&c, 63);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    secp256k1_u128_accum_mul(&c, a0, a2);
+    secp256k1_u128_accum_mul(&c, a1, a1);
+    VERIFY_BITS_128(&c, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    secp256k1_u128_accum_mul(&d, a3, a4);
+    VERIFY_BITS_128(&d, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
+    VERIFY_BITS_128(&c, 115);
+    VERIFY_BITS_128(&d, 50);
+    /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS_128(&c, 63);
+    /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
+    secp256k1_u128_accum_u64(&c, t3);
+    VERIFY_BITS_128(&c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS_128(&c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = secp256k1_u128_to_u64(&c) + t4;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+#endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */
--- a/libsecp256k1/src/field_impl.h
+++ b/libsecp256k1/src/field_impl.h
@@ -0,0 +1,457 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_FIELD_IMPL_H
+#define SECP256K1_FIELD_IMPL_H
+
+#include "field.h"
+#include "util.h"
+
+#if defined(SECP256K1_WIDEMUL_INT128)
+#include "field_5x52_impl.h"
+#elif defined(SECP256K1_WIDEMUL_INT64)
+#include "field_10x26_impl.h"
+#else
+#error "Please select wide multiplication implementation"
+#endif
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) {
+    secp256k1_memclear(a, sizeof(secp256k1_fe));
+}
+
+SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe *a, const secp256k1_fe *b) {
+    secp256k1_fe na;
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY(b);
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, 1);
+    SECP256K1_FE_VERIFY_MAGNITUDE(b, 31);
+
+    secp256k1_fe_negate(&na, a, 1);
+    secp256k1_fe_add(&na, b);
+    return secp256k1_fe_normalizes_to_zero(&na);
+}
+
+static int secp256k1_fe_sqrt(secp256k1_fe * SECP256K1_RESTRICT r, const secp256k1_fe * SECP256K1_RESTRICT a) {
+    /** Given that p is congruent to 3 mod 4, we can compute the square root of
+     *  a mod p as the (p+1)/4'th power of a.
+     *
+     *  As (p+1)/4 is an even number, it will have the same result for a and for
+     *  (-a). Only one of these two numbers actually has a square root however,
+     *  so we test at the end by squaring and comparing to the input.
+     *  Also because (p+1)/4 is an even number, the computed square root is
+     *  itself always a square (a ** ((p+1)/4) is the square of a ** ((p+1)/8)).
+     */
+    secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
+    int j, ret;
+
+    VERIFY_CHECK(r != a);
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, 8);
+
+    /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
+     *  { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
+     *  1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
+     */
+
+    secp256k1_fe_sqr(&x2, a);
+    secp256k1_fe_mul(&x2, &x2, a);
+
+    secp256k1_fe_sqr(&x3, &x2);
+    secp256k1_fe_mul(&x3, &x3, a);
+
+    x6 = x3;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x6, &x6);
+    }
+    secp256k1_fe_mul(&x6, &x6, &x3);
+
+    x9 = x6;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x9, &x9);
+    }
+    secp256k1_fe_mul(&x9, &x9, &x3);
+
+    x11 = x9;
+    for (j=0; j<2; j++) {
+        secp256k1_fe_sqr(&x11, &x11);
+    }
+    secp256k1_fe_mul(&x11, &x11, &x2);
+
+    x22 = x11;
+    for (j=0; j<11; j++) {
+        secp256k1_fe_sqr(&x22, &x22);
+    }
+    secp256k1_fe_mul(&x22, &x22, &x11);
+
+    x44 = x22;
+    for (j=0; j<22; j++) {
+        secp256k1_fe_sqr(&x44, &x44);
+    }
+    secp256k1_fe_mul(&x44, &x44, &x22);
+
+    x88 = x44;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x88, &x88);
+    }
+    secp256k1_fe_mul(&x88, &x88, &x44);
+
+    x176 = x88;
+    for (j=0; j<88; j++) {
+        secp256k1_fe_sqr(&x176, &x176);
+    }
+    secp256k1_fe_mul(&x176, &x176, &x88);
+
+    x220 = x176;
+    for (j=0; j<44; j++) {
+        secp256k1_fe_sqr(&x220, &x220);
+    }
+    secp256k1_fe_mul(&x220, &x220, &x44);
+
+    x223 = x220;
+    for (j=0; j<3; j++) {
+        secp256k1_fe_sqr(&x223, &x223);
+    }
+    secp256k1_fe_mul(&x223, &x223, &x3);
+
+    /* The final result is then assembled using a sliding window over the blocks. */
+
+    t1 = x223;
+    for (j=0; j<23; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x22);
+    for (j=0; j<6; j++) {
+        secp256k1_fe_sqr(&t1, &t1);
+    }
+    secp256k1_fe_mul(&t1, &t1, &x2);
+    secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_sqr(r, &t1);
+
+    /* Check that a square root was actually calculated */
+
+    secp256k1_fe_sqr(&t1, r);
+    ret = secp256k1_fe_equal(&t1, a);
+
+#ifdef VERIFY
+    if (!ret) {
+        secp256k1_fe_negate(&t1, &t1, 1);
+        secp256k1_fe_normalize_var(&t1);
+        VERIFY_CHECK(secp256k1_fe_equal(&t1, a));
+    }
+#endif
+    return ret;
+}
+
+#ifndef VERIFY
+static void secp256k1_fe_verify(const secp256k1_fe *a) { (void)a; }
+static void secp256k1_fe_verify_magnitude(const secp256k1_fe *a, int m) { (void)a; (void)m; }
+#else
+static void secp256k1_fe_impl_verify(const secp256k1_fe *a);
+static void secp256k1_fe_verify(const secp256k1_fe *a) {
+    /* Magnitude between 0 and 32. */
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, 32);
+    /* Normalized is 0 or 1. */
+    VERIFY_CHECK((a->normalized == 0) || (a->normalized == 1));
+    /* If normalized, magnitude must be 0 or 1. */
+    if (a->normalized) SECP256K1_FE_VERIFY_MAGNITUDE(a, 1);
+    /* Invoke implementation-specific checks. */
+    secp256k1_fe_impl_verify(a);
+}
+
+static void secp256k1_fe_verify_magnitude(const secp256k1_fe *a, int m) {
+    VERIFY_CHECK(m >= 0);
+    VERIFY_CHECK(m <= 32);
+    VERIFY_CHECK(a->magnitude <= m);
+}
+
+static void secp256k1_fe_impl_normalize(secp256k1_fe *r);
+SECP256K1_INLINE static void secp256k1_fe_normalize(secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+
+    secp256k1_fe_impl_normalize(r);
+    r->magnitude = 1;
+    r->normalized = 1;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r);
+SECP256K1_INLINE static void secp256k1_fe_normalize_weak(secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+
+    secp256k1_fe_impl_normalize_weak(r);
+    r->magnitude = 1;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r);
+SECP256K1_INLINE static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+
+    secp256k1_fe_impl_normalize_var(r);
+    r->magnitude = 1;
+    r->normalized = 1;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r);
+SECP256K1_INLINE static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+
+    return secp256k1_fe_impl_normalizes_to_zero(r);
+}
+
+static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r);
+SECP256K1_INLINE static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+
+    return secp256k1_fe_impl_normalizes_to_zero_var(r);
+}
+
+static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a);
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) {
+    VERIFY_CHECK(0 <= a && a <= 0x7FFF);
+
+    secp256k1_fe_impl_set_int(r, a);
+    r->magnitude = (a != 0);
+    r->normalized = 1;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a);
+SECP256K1_INLINE static void secp256k1_fe_add_int(secp256k1_fe *r, int a) {
+    VERIFY_CHECK(0 <= a && a <= 0x7FFF);
+    SECP256K1_FE_VERIFY(r);
+
+    secp256k1_fe_impl_add_int(r, a);
+    r->magnitude += 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a);
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(a->normalized);
+
+    return secp256k1_fe_impl_is_zero(a);
+}
+
+static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a);
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(a->normalized);
+
+    return secp256k1_fe_impl_is_odd(a);
+}
+
+static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b);
+SECP256K1_INLINE static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY(b);
+    VERIFY_CHECK(a->normalized);
+    VERIFY_CHECK(b->normalized);
+
+    return secp256k1_fe_impl_cmp_var(a, b);
+}
+
+static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a);
+SECP256K1_INLINE static void secp256k1_fe_set_b32_mod(secp256k1_fe *r, const unsigned char *a) {
+    secp256k1_fe_impl_set_b32_mod(r, a);
+    r->magnitude = 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a);
+SECP256K1_INLINE static int secp256k1_fe_set_b32_limit(secp256k1_fe *r, const unsigned char *a) {
+    if (secp256k1_fe_impl_set_b32_limit(r, a)) {
+        r->magnitude = 1;
+        r->normalized = 1;
+        SECP256K1_FE_VERIFY(r);
+        return 1;
+    } else {
+        /* Mark the output field element as invalid. */
+        r->magnitude = -1;
+        return 0;
+    }
+}
+
+static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a);
+SECP256K1_INLINE static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(a->normalized);
+
+    secp256k1_fe_impl_get_b32(r, a);
+}
+
+static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m);
+SECP256K1_INLINE static void secp256k1_fe_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(m >= 0 && m <= 31);
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, m);
+
+    secp256k1_fe_impl_negate_unchecked(r, a, m);
+    r->magnitude = m + 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a);
+SECP256K1_INLINE static void secp256k1_fe_mul_int_unchecked(secp256k1_fe *r, int a) {
+    SECP256K1_FE_VERIFY(r);
+
+    VERIFY_CHECK(a >= 0 && a <= 32);
+    VERIFY_CHECK(a*r->magnitude <= 32);
+    secp256k1_fe_impl_mul_int_unchecked(r, a);
+    r->magnitude *= a;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a);
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(r);
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(r->magnitude + a->magnitude <= 32);
+
+    secp256k1_fe_impl_add(r, a);
+    r->magnitude += a->magnitude;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b);
+SECP256K1_INLINE static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY(b);
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, 8);
+    SECP256K1_FE_VERIFY_MAGNITUDE(b, 8);
+    VERIFY_CHECK(r != b);
+    VERIFY_CHECK(a != b);
+
+    secp256k1_fe_impl_mul(r, a, b);
+    r->magnitude = 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a);
+SECP256K1_INLINE static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY_MAGNITUDE(a, 8);
+
+    secp256k1_fe_impl_sqr(r, a);
+    r->magnitude = 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);
+SECP256K1_INLINE static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
+    VERIFY_CHECK(flag == 0 || flag == 1);
+    SECP256K1_FE_VERIFY(a);
+    SECP256K1_FE_VERIFY(r);
+
+    secp256k1_fe_impl_cmov(r, a, flag);
+    if (a->magnitude > r->magnitude) r->magnitude = a->magnitude;
+    if (!a->normalized) r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a);
+SECP256K1_INLINE static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
+    SECP256K1_FE_VERIFY(a);
+    VERIFY_CHECK(a->normalized);
+
+    secp256k1_fe_impl_to_storage(r, a);
+}
+
+static void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a);
+SECP256K1_INLINE static void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
+    secp256k1_fe_impl_from_storage(r, a);
+    r->magnitude = 1;
+    r->normalized = 1;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x);
+SECP256K1_INLINE static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
+    int input_is_zero = secp256k1_fe_normalizes_to_zero(x);
+    SECP256K1_FE_VERIFY(x);
+
+    secp256k1_fe_impl_inv(r, x);
+    r->magnitude = x->magnitude > 0;
+    r->normalized = 1;
+
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == input_is_zero);
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x);
+SECP256K1_INLINE static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
+    int input_is_zero = secp256k1_fe_normalizes_to_zero(x);
+    SECP256K1_FE_VERIFY(x);
+
+    secp256k1_fe_impl_inv_var(r, x);
+    r->magnitude = x->magnitude > 0;
+    r->normalized = 1;
+
+    VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == input_is_zero);
+    SECP256K1_FE_VERIFY(r);
+}
+
+static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x);
+SECP256K1_INLINE static int secp256k1_fe_is_square_var(const secp256k1_fe *x) {
+    int ret;
+    secp256k1_fe tmp = *x, sqrt;
+    SECP256K1_FE_VERIFY(x);
+
+    ret = secp256k1_fe_impl_is_square_var(x);
+    secp256k1_fe_normalize_weak(&tmp);
+    VERIFY_CHECK(ret == secp256k1_fe_sqrt(&sqrt, &tmp));
+    return ret;
+}
+
+static void secp256k1_fe_impl_get_bounds(secp256k1_fe* r, int m);
+SECP256K1_INLINE static void secp256k1_fe_get_bounds(secp256k1_fe* r, int m) {
+    VERIFY_CHECK(m >= 0);
+    VERIFY_CHECK(m <= 32);
+
+    secp256k1_fe_impl_get_bounds(r, m);
+    r->magnitude = m;
+    r->normalized = (m == 0);
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+static void secp256k1_fe_impl_half(secp256k1_fe *r);
+SECP256K1_INLINE static void secp256k1_fe_half(secp256k1_fe *r) {
+    SECP256K1_FE_VERIFY(r);
+    SECP256K1_FE_VERIFY_MAGNITUDE(r, 31);
+
+    secp256k1_fe_impl_half(r);
+    r->magnitude = (r->magnitude >> 1) + 1;
+    r->normalized = 0;
+
+    SECP256K1_FE_VERIFY(r);
+}
+
+#endif /* defined(VERIFY) */
+
+#endif /* SECP256K1_FIELD_IMPL_H */
--- a/libsecp256k1/src/group.h
+++ b/libsecp256k1/src/group.h
@@ -0,0 +1,212 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_GROUP_H
+#define SECP256K1_GROUP_H
+
+#include "field.h"
+
+/** A group element in affine coordinates on the secp256k1 curve,
+ *  or occasionally on an isomorphic curve of the form y^2 = x^3 + 7*t^6.
+ *  Note: For exhaustive test mode, secp256k1 is replaced by a small subgroup of a different curve.
+ */
+typedef struct {
+    secp256k1_fe x;
+    secp256k1_fe y;
+    int infinity; /* whether this represents the point at infinity */
+} secp256k1_ge;
+
+#define SECP256K1_GE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), 0}
+#define SECP256K1_GE_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
+
+/** A group element of the secp256k1 curve, in jacobian coordinates.
+ *  Note: For exhastive test mode, secp256k1 is replaced by a small subgroup of a different curve.
+ */
+typedef struct {
+    secp256k1_fe x; /* actual X: x/z^2 */
+    secp256k1_fe y; /* actual Y: y/z^3 */
+    secp256k1_fe z;
+    int infinity; /* whether this represents the point at infinity */
+} secp256k1_gej;
+
+#define SECP256K1_GEJ_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1), 0}
+#define SECP256K1_GEJ_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
+
+typedef struct {
+    secp256k1_fe_storage x;
+    secp256k1_fe_storage y;
+} secp256k1_ge_storage;
+
+#define SECP256K1_GE_STORAGE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_STORAGE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_STORAGE_CONST((i),(j),(k),(l),(m),(n),(o),(p))}
+
+#define SECP256K1_GE_STORAGE_CONST_GET(t) SECP256K1_FE_STORAGE_CONST_GET(t.x), SECP256K1_FE_STORAGE_CONST_GET(t.y)
+
+/** Maximum allowed magnitudes for group element coordinates
+ *  in affine (x, y) and jacobian (x, y, z) representation. */
+#define SECP256K1_GE_X_MAGNITUDE_MAX  4
+#define SECP256K1_GE_Y_MAGNITUDE_MAX  3
+#define SECP256K1_GEJ_X_MAGNITUDE_MAX 4
+#define SECP256K1_GEJ_Y_MAGNITUDE_MAX 4
+#define SECP256K1_GEJ_Z_MAGNITUDE_MAX 1
+
+/** Set a group element equal to the point with given X and Y coordinates */
+static void secp256k1_ge_set_xy(secp256k1_ge *r, const secp256k1_fe *x, const secp256k1_fe *y);
+
+/** Set a group element (affine) equal to the point with the given X coordinate, and given oddness
+ *  for Y. Return value indicates whether the result is valid. */
+static int secp256k1_ge_set_xo_var(secp256k1_ge *r, const secp256k1_fe *x, int odd);
+
+/** Determine whether x is a valid X coordinate on the curve. */
+static int secp256k1_ge_x_on_curve_var(const secp256k1_fe *x);
+
+/** Determine whether fraction xn/xd is a valid X coordinate on the curve (xd != 0). */
+static int secp256k1_ge_x_frac_on_curve_var(const secp256k1_fe *xn, const secp256k1_fe *xd);
+
+/** Check whether a group element is the point at infinity. */
+static int secp256k1_ge_is_infinity(const secp256k1_ge *a);
+
+/** Check whether a group element is valid (i.e., on the curve). */
+static int secp256k1_ge_is_valid_var(const secp256k1_ge *a);
+
+/** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
+static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a);
+
+/** Set a group element equal to another which is given in jacobian coordinates. Constant time. */
+static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a);
+
+/** Set a group element equal to another which is given in jacobian coordinates. */
+static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a);
+
+/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
+static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);
+
+/** Bring a batch of inputs to the same global z "denominator", based on ratios between
+ *  (omitted) z coordinates of adjacent elements.
+ *
+ *  Although the elements a[i] are _ge rather than _gej, they actually represent elements
+ *  in Jacobian coordinates with their z coordinates omitted.
+ *
+ *  Using the notation z(b) to represent the omitted z coordinate of b, the array zr of
+ *  z coordinate ratios must satisfy zr[i] == z(a[i]) / z(a[i-1]) for 0 < 'i' < len.
+ *  The zr[0] value is unused.
+ *
+ *  This function adjusts the coordinates of 'a' in place so that for all 'i', z(a[i]) == z(a[len-1]).
+ *  In other words, the initial value of z(a[len-1]) becomes the global z "denominator". Only the
+ *  a[i].x and a[i].y coordinates are explicitly modified; the adjustment of the omitted z coordinate is
+ *  implicit.
+ *
+ *  The coordinates of the final element a[len-1] are not changed.
+ */
+static void secp256k1_ge_table_set_globalz(size_t len, secp256k1_ge *a, const secp256k1_fe *zr);
+
+/** Check two group elements (affine) for equality in variable time. */
+static int secp256k1_ge_eq_var(const secp256k1_ge *a, const secp256k1_ge *b);
+
+/** Set a group element (affine) equal to the point at infinity. */
+static void secp256k1_ge_set_infinity(secp256k1_ge *r);
+
+/** Set a group element (jacobian) equal to the point at infinity. */
+static void secp256k1_gej_set_infinity(secp256k1_gej *r);
+
+/** Set a group element (jacobian) equal to another which is given in affine coordinates. */
+static void secp256k1_gej_set_ge(secp256k1_gej *r, const secp256k1_ge *a);
+
+/** Check two group elements (jacobian) for equality in variable time. */
+static int secp256k1_gej_eq_var(const secp256k1_gej *a, const secp256k1_gej *b);
+
+/** Check two group elements (jacobian and affine) for equality in variable time. */
+static int secp256k1_gej_eq_ge_var(const secp256k1_gej *a, const secp256k1_ge *b);
+
+/** Compare the X coordinate of a group element (jacobian).
+  * The magnitude of the group element's X coordinate must not exceed 31. */
+static int secp256k1_gej_eq_x_var(const secp256k1_fe *x, const secp256k1_gej *a);
+
+/** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
+static void secp256k1_gej_neg(secp256k1_gej *r, const secp256k1_gej *a);
+
+/** Check whether a group element is the point at infinity. */
+static int secp256k1_gej_is_infinity(const secp256k1_gej *a);
+
+/** Set r equal to the double of a. Constant time. */
+static void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a);
+
+/** Set r equal to the double of a. If rzr is not-NULL this sets *rzr such that r->z == a->z * *rzr (where infinity means an implicit z = 0). */
+static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr);
+
+/** Set r equal to the sum of a and b. If rzr is non-NULL this sets *rzr such that r->z == a->z * *rzr (a cannot be infinity in that case). */
+static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_gej *b, secp256k1_fe *rzr);
+
+/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */
+static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b);
+
+/** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient
+    than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time
+    guarantee, and b is allowed to be infinity. If rzr is non-NULL this sets *rzr such that r->z == a->z * *rzr (a cannot be infinity in that case). */
+static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, secp256k1_fe *rzr);
+
+/** Set r equal to the sum of a and b (with the inverse of b's Z coordinate passed as bzinv). */
+static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, const secp256k1_fe *bzinv);
+
+/** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
+static void secp256k1_ge_mul_lambda(secp256k1_ge *r, const secp256k1_ge *a);
+
+/** Clear a secp256k1_gej to prevent leaking sensitive information. */
+static void secp256k1_gej_clear(secp256k1_gej *r);
+
+/** Clear a secp256k1_ge to prevent leaking sensitive information. */
+static void secp256k1_ge_clear(secp256k1_ge *r);
+
+/** Convert a group element to the storage type. */
+static void secp256k1_ge_to_storage(secp256k1_ge_storage *r, const secp256k1_ge *a);
+
+/** Convert a group element back from the storage type. */
+static void secp256k1_ge_from_storage(secp256k1_ge *r, const secp256k1_ge_storage *a);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
+static void secp256k1_gej_cmov(secp256k1_gej *r, const secp256k1_gej *a, int flag);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
+static void secp256k1_ge_storage_cmov(secp256k1_ge_storage *r, const secp256k1_ge_storage *a, int flag);
+
+/** Rescale a jacobian point by b which must be non-zero. Constant-time. */
+static void secp256k1_gej_rescale(secp256k1_gej *r, const secp256k1_fe *b);
+
+/** Convert a group element that is not infinity to a 64-byte array. The output
+ *  array is platform-dependent. */
+static void secp256k1_ge_to_bytes(unsigned char *buf, const secp256k1_ge *a);
+
+/** Convert a 64-byte array into group element. This function assumes that the
+ *  provided buffer correctly encodes a group element. */
+static void secp256k1_ge_from_bytes(secp256k1_ge *r, const unsigned char *buf);
+
+/** Convert a group element (that is allowed to be infinity) to a 64-byte
+ *  array. The output array is platform-dependent. */
+static void secp256k1_ge_to_bytes_ext(unsigned char *data, const secp256k1_ge *ge);
+
+/** Convert a 64-byte array into a group element. This function assumes that the
+ *  provided buffer is the output of secp256k1_ge_to_bytes_ext. */
+static void secp256k1_ge_from_bytes_ext(secp256k1_ge *ge, const unsigned char *data);
+
+/** Determine if a point (which is assumed to be on the curve) is in the correct (sub)group of the curve.
+ *
+ * In normal mode, the used group is secp256k1, which has cofactor=1 meaning that every point on the curve is in the
+ * group, and this function returns always true.
+ *
+ * When compiling in exhaustive test mode, a slightly different curve equation is used, leading to a group with a
+ * (very) small subgroup, and that subgroup is what is used for all cryptographic operations. In that mode, this
+ * function checks whether a point that is on the curve is in fact also in that subgroup.
+ */
+static int secp256k1_ge_is_in_correct_subgroup(const secp256k1_ge* ge);
+
+/** Check invariants on an affine group element (no-op unless VERIFY is enabled). */
+static void secp256k1_ge_verify(const secp256k1_ge *a);
+#define SECP256K1_GE_VERIFY(a) secp256k1_ge_verify(a)
+
+/** Check invariants on a Jacobian group element (no-op unless VERIFY is enabled). */
+static void secp256k1_gej_verify(const secp256k1_gej *a);
+#define SECP256K1_GEJ_VERIFY(a) secp256k1_gej_verify(a)
+
+#endif /* SECP256K1_GROUP_H */
--- a/libsecp256k1/src/group_impl.h
+++ b/libsecp256k1/src/group_impl.h
@@ -0,0 +1,974 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_GROUP_IMPL_H
+#define SECP256K1_GROUP_IMPL_H
+
+#include <string.h>
+
+#include "field.h"
+#include "group.h"
+#include "util.h"
+
+/* Begin of section generated by sage/gen_exhaustive_groups.sage. */
+#define SECP256K1_G_ORDER_7 SECP256K1_GE_CONST(\
+    0x66625d13, 0x317ffe44, 0x63d32cff, 0x1ca02b9b,\
+    0xe5c6d070, 0x50b4b05e, 0x81cc30db, 0xf5166f0a,\
+    0x1e60e897, 0xa7c00c7c, 0x2df53eb6, 0x98274ff4,\
+    0x64252f42, 0x8ca44e17, 0x3b25418c, 0xff4ab0cf\
+)
+#define SECP256K1_G_ORDER_13 SECP256K1_GE_CONST(\
+    0xa2482ff8, 0x4bf34edf, 0xa51262fd, 0xe57921db,\
+    0xe0dd2cb7, 0xa5914790, 0xbc71631f, 0xc09704fb,\
+    0x942536cb, 0xa3e49492, 0x3a701cc3, 0xee3e443f,\
+    0xdf182aa9, 0x15b8aa6a, 0x166d3b19, 0xba84b045\
+)
+#define SECP256K1_G_ORDER_199 SECP256K1_GE_CONST(\
+    0x7fb07b5c, 0xd07c3bda, 0x553902e2, 0x7a87ea2c,\
+    0x35108a7f, 0x051f41e5, 0xb76abad5, 0x1f2703ad,\
+    0x0a251539, 0x5b4c4438, 0x952a634f, 0xac10dd4d,\
+    0x6d6f4745, 0x98990c27, 0x3a4f3116, 0xd32ff969\
+)
+/** Generator for secp256k1, value 'g' defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ */
+#define SECP256K1_G SECP256K1_GE_CONST(\
+    0x79be667e, 0xf9dcbbac, 0x55a06295, 0xce870b07,\
+    0x029bfcdb, 0x2dce28d9, 0x59f2815b, 0x16f81798,\
+    0x483ada77, 0x26a3c465, 0x5da4fbfc, 0x0e1108a8,\
+    0xfd17b448, 0xa6855419, 0x9c47d08f, 0xfb10d4b8\
+)
+/* These exhaustive group test orders and generators are chosen such that:
+ * - The field size is equal to that of secp256k1, so field code is the same.
+ * - The curve equation is of the form y^2=x^3+B for some small constant B.
+ * - The subgroup has a generator 2*P, where P.x is as small as possible.
+ * - The subgroup has size less than 1000 to permit exhaustive testing.
+ * - The subgroup admits an endomorphism of the form lambda*(x,y) == (beta*x,y).
+ */
+#if defined(EXHAUSTIVE_TEST_ORDER)
+#  if EXHAUSTIVE_TEST_ORDER == 7
+
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_G_ORDER_7;
+#define SECP256K1_B 6
+
+#  elif EXHAUSTIVE_TEST_ORDER == 13
+
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_G_ORDER_13;
+#define SECP256K1_B 2
+
+#  elif EXHAUSTIVE_TEST_ORDER == 199
+
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_G_ORDER_199;
+#define SECP256K1_B 4
+
+#  else
+#    error No known generator for the specified exhaustive test group order.
+#  endif
+#else
+
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_G;
+#define SECP256K1_B 7
+
+#endif
+/* End of section generated by sage/gen_exhaustive_groups.sage. */
+
+static void secp256k1_ge_verify(const secp256k1_ge *a) {
+    SECP256K1_FE_VERIFY(&a->x);
+    SECP256K1_FE_VERIFY(&a->y);
+    SECP256K1_FE_VERIFY_MAGNITUDE(&a->x, SECP256K1_GE_X_MAGNITUDE_MAX);
+    SECP256K1_FE_VERIFY_MAGNITUDE(&a->y, SECP256K1_GE_Y_MAGNITUDE_MAX);
+    VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);
+    (void)a;
+}
+
+static void secp256k1_gej_verify(const secp256k1_gej *a) {
+    SECP256K1_FE_VERIFY(&a->x);
+    SECP256K1_FE_VERIFY(&a->y);
+    SECP256K1_FE_VERIFY(&a->z);
+    SECP256K1_FE_VERIFY_MAGNITUDE(&a->x, SECP256K1_GEJ_X_MAGNITUDE_MAX);
+    SECP256K1_FE_VERIFY_MAGNITUDE(&a->y, SECP256K1_GEJ_Y_MAGNITUDE_MAX);
+    SECP256K1_FE_VERIFY_MAGNITUDE(&a->z, SECP256K1_GEJ_Z_MAGNITUDE_MAX);
+    VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);
+    (void)a;
+}
+
+/* Set r to the affine coordinates of Jacobian point (a.x, a.y, 1/zi). */
+static void secp256k1_ge_set_gej_zinv(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zi) {
+    secp256k1_fe zi2;
+    secp256k1_fe zi3;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_FE_VERIFY(zi);
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_fe_sqr(&zi2, zi);
+    secp256k1_fe_mul(&zi3, &zi2, zi);
+    secp256k1_fe_mul(&r->x, &a->x, &zi2);
+    secp256k1_fe_mul(&r->y, &a->y, &zi3);
+    r->infinity = a->infinity;
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+/* Set r to the affine coordinates of Jacobian point (a.x, a.y, 1/zi). */
+static void secp256k1_ge_set_ge_zinv(secp256k1_ge *r, const secp256k1_ge *a, const secp256k1_fe *zi) {
+    secp256k1_fe zi2;
+    secp256k1_fe zi3;
+    SECP256K1_GE_VERIFY(a);
+    SECP256K1_FE_VERIFY(zi);
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_fe_sqr(&zi2, zi);
+    secp256k1_fe_mul(&zi3, &zi2, zi);
+    secp256k1_fe_mul(&r->x, &a->x, &zi2);
+    secp256k1_fe_mul(&r->y, &a->y, &zi3);
+    r->infinity = a->infinity;
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static void secp256k1_ge_set_xy(secp256k1_ge *r, const secp256k1_fe *x, const secp256k1_fe *y) {
+    SECP256K1_FE_VERIFY(x);
+    SECP256K1_FE_VERIFY(y);
+
+    r->infinity = 0;
+    r->x = *x;
+    r->y = *y;
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static int secp256k1_ge_is_infinity(const secp256k1_ge *a) {
+    SECP256K1_GE_VERIFY(a);
+
+    return a->infinity;
+}
+
+static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a) {
+    SECP256K1_GE_VERIFY(a);
+
+    *r = *a;
+    secp256k1_fe_normalize_weak(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a) {
+    secp256k1_fe z2, z3;
+    SECP256K1_GEJ_VERIFY(a);
+
+    r->infinity = a->infinity;
+    secp256k1_fe_inv(&a->z, &a->z);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    r->x = a->x;
+    r->y = a->y;
+
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(r);
+}
+
+static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a) {
+    secp256k1_fe z2, z3;
+    SECP256K1_GEJ_VERIFY(a);
+
+    if (secp256k1_gej_is_infinity(a)) {
+        secp256k1_ge_set_infinity(r);
+        return;
+    }
+    r->infinity = 0;
+    secp256k1_fe_inv_var(&a->z, &a->z);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    secp256k1_ge_set_xy(r, &a->x, &a->y);
+
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(r);
+}
+
+static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len) {
+    secp256k1_fe u;
+    size_t i;
+    size_t last_i = SIZE_MAX;
+#ifdef VERIFY
+    for (i = 0; i < len; i++) {
+        SECP256K1_GEJ_VERIFY(&a[i]);
+    }
+#endif
+
+    for (i = 0; i < len; i++) {
+        if (a[i].infinity) {
+            secp256k1_ge_set_infinity(&r[i]);
+        } else {
+            /* Use destination's x coordinates as scratch space */
+            if (last_i == SIZE_MAX) {
+                r[i].x = a[i].z;
+            } else {
+                secp256k1_fe_mul(&r[i].x, &r[last_i].x, &a[i].z);
+            }
+            last_i = i;
+        }
+    }
+    if (last_i == SIZE_MAX) {
+        return;
+    }
+    secp256k1_fe_inv_var(&u, &r[last_i].x);
+
+    i = last_i;
+    while (i > 0) {
+        i--;
+        if (!a[i].infinity) {
+            secp256k1_fe_mul(&r[last_i].x, &r[i].x, &u);
+            secp256k1_fe_mul(&u, &u, &a[last_i].z);
+            last_i = i;
+        }
+    }
+    VERIFY_CHECK(!a[last_i].infinity);
+    r[last_i].x = u;
+
+    for (i = 0; i < len; i++) {
+        if (!a[i].infinity) {
+            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &r[i].x);
+        }
+    }
+
+#ifdef VERIFY
+    for (i = 0; i < len; i++) {
+        SECP256K1_GE_VERIFY(&r[i]);
+    }
+#endif
+}
+
+static void secp256k1_ge_table_set_globalz(size_t len, secp256k1_ge *a, const secp256k1_fe *zr) {
+    size_t i;
+    secp256k1_fe zs;
+#ifdef VERIFY
+    for (i = 0; i < len; i++) {
+        SECP256K1_GE_VERIFY(&a[i]);
+        SECP256K1_FE_VERIFY(&zr[i]);
+    }
+#endif
+
+    if (len > 0) {
+        i = len - 1;
+        /* Ensure all y values are in weak normal form for fast negation of points */
+        secp256k1_fe_normalize_weak(&a[i].y);
+        zs = zr[i];
+
+        /* Work our way backwards, using the z-ratios to scale the x/y values. */
+        while (i > 0) {
+            if (i != len - 1) {
+                secp256k1_fe_mul(&zs, &zs, &zr[i]);
+            }
+            i--;
+            secp256k1_ge_set_ge_zinv(&a[i], &a[i], &zs);
+        }
+    }
+
+#ifdef VERIFY
+    for (i = 0; i < len; i++) {
+        SECP256K1_GE_VERIFY(&a[i]);
+    }
+#endif
+}
+
+static void secp256k1_gej_set_infinity(secp256k1_gej *r) {
+    r->infinity = 1;
+    secp256k1_fe_set_int(&r->x, 0);
+    secp256k1_fe_set_int(&r->y, 0);
+    secp256k1_fe_set_int(&r->z, 0);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_ge_set_infinity(secp256k1_ge *r) {
+    r->infinity = 1;
+    secp256k1_fe_set_int(&r->x, 0);
+    secp256k1_fe_set_int(&r->y, 0);
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static void secp256k1_gej_clear(secp256k1_gej *r) {
+    secp256k1_memclear(r, sizeof(secp256k1_gej));
+}
+
+static void secp256k1_ge_clear(secp256k1_ge *r) {
+    secp256k1_memclear(r, sizeof(secp256k1_ge));
+}
+
+static int secp256k1_ge_set_xo_var(secp256k1_ge *r, const secp256k1_fe *x, int odd) {
+    secp256k1_fe x2, x3;
+    int ret;
+    SECP256K1_FE_VERIFY(x);
+
+    r->x = *x;
+    secp256k1_fe_sqr(&x2, x);
+    secp256k1_fe_mul(&x3, x, &x2);
+    r->infinity = 0;
+    secp256k1_fe_add_int(&x3, SECP256K1_B);
+    ret = secp256k1_fe_sqrt(&r->y, &x3);
+    secp256k1_fe_normalize_var(&r->y);
+    if (secp256k1_fe_is_odd(&r->y) != odd) {
+        secp256k1_fe_negate(&r->y, &r->y, 1);
+    }
+
+    SECP256K1_GE_VERIFY(r);
+    return ret;
+}
+
+static void secp256k1_gej_set_ge(secp256k1_gej *r, const secp256k1_ge *a) {
+   SECP256K1_GE_VERIFY(a);
+
+   r->infinity = a->infinity;
+   r->x = a->x;
+   r->y = a->y;
+   secp256k1_fe_set_int(&r->z, 1);
+
+   SECP256K1_GEJ_VERIFY(r);
+}
+
+static int secp256k1_gej_eq_var(const secp256k1_gej *a, const secp256k1_gej *b) {
+    secp256k1_gej tmp;
+    SECP256K1_GEJ_VERIFY(b);
+    SECP256K1_GEJ_VERIFY(a);
+
+    secp256k1_gej_neg(&tmp, a);
+    secp256k1_gej_add_var(&tmp, &tmp, b, NULL);
+    return secp256k1_gej_is_infinity(&tmp);
+}
+
+static int secp256k1_gej_eq_ge_var(const secp256k1_gej *a, const secp256k1_ge *b) {
+    secp256k1_gej tmp;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(b);
+
+    secp256k1_gej_neg(&tmp, a);
+    secp256k1_gej_add_ge_var(&tmp, &tmp, b, NULL);
+    return secp256k1_gej_is_infinity(&tmp);
+}
+
+static int secp256k1_ge_eq_var(const secp256k1_ge *a, const secp256k1_ge *b) {
+    secp256k1_fe tmp;
+    SECP256K1_GE_VERIFY(a);
+    SECP256K1_GE_VERIFY(b);
+
+    if (a->infinity != b->infinity) return 0;
+    if (a->infinity) return 1;
+
+    tmp = a->x;
+    secp256k1_fe_normalize_weak(&tmp);
+    if (!secp256k1_fe_equal(&tmp, &b->x)) return 0;
+
+    tmp = a->y;
+    secp256k1_fe_normalize_weak(&tmp);
+    if (!secp256k1_fe_equal(&tmp, &b->y)) return 0;
+
+    return 1;
+}
+
+static int secp256k1_gej_eq_x_var(const secp256k1_fe *x, const secp256k1_gej *a) {
+    secp256k1_fe r;
+    SECP256K1_FE_VERIFY(x);
+    SECP256K1_GEJ_VERIFY(a);
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
+    return secp256k1_fe_equal(&r, &a->x);
+}
+
+static void secp256k1_gej_neg(secp256k1_gej *r, const secp256k1_gej *a) {
+    SECP256K1_GEJ_VERIFY(a);
+
+    r->infinity = a->infinity;
+    r->x = a->x;
+    r->y = a->y;
+    r->z = a->z;
+    secp256k1_fe_normalize_weak(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static int secp256k1_gej_is_infinity(const secp256k1_gej *a) {
+    SECP256K1_GEJ_VERIFY(a);
+
+    return a->infinity;
+}
+
+static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) {
+    secp256k1_fe y2, x3;
+    SECP256K1_GE_VERIFY(a);
+
+    if (a->infinity) {
+        return 0;
+    }
+    /* y^2 = x^3 + 7 */
+    secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_add_int(&x3, SECP256K1_B);
+    return secp256k1_fe_equal(&y2, &x3);
+}
+
+static SECP256K1_INLINE void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a) {
+    /* Operations: 3 mul, 4 sqr, 8 add/half/mul_int/negate */
+    secp256k1_fe l, s, t;
+    SECP256K1_GEJ_VERIFY(a);
+
+    r->infinity = a->infinity;
+
+    /* Formula used:
+     * L = (3/2) * X1^2
+     * S = Y1^2
+     * T = -X1*S
+     * X3 = L^2 + 2*T
+     * Y3 = -(L*(X3 + T) + S^2)
+     * Z3 = Y1*Z1
+     */
+
+    secp256k1_fe_mul(&r->z, &a->z, &a->y); /* Z3 = Y1*Z1 (1) */
+    secp256k1_fe_sqr(&s, &a->y);           /* S = Y1^2 (1) */
+    secp256k1_fe_sqr(&l, &a->x);           /* L = X1^2 (1) */
+    secp256k1_fe_mul_int(&l, 3);           /* L = 3*X1^2 (3) */
+    secp256k1_fe_half(&l);                 /* L = 3/2*X1^2 (2) */
+    secp256k1_fe_negate(&t, &s, 1);        /* T = -S (2) */
+    secp256k1_fe_mul(&t, &t, &a->x);       /* T = -X1*S (1) */
+    secp256k1_fe_sqr(&r->x, &l);           /* X3 = L^2 (1) */
+    secp256k1_fe_add(&r->x, &t);           /* X3 = L^2 + T (2) */
+    secp256k1_fe_add(&r->x, &t);           /* X3 = L^2 + 2*T (3) */
+    secp256k1_fe_sqr(&s, &s);              /* S' = S^2 (1) */
+    secp256k1_fe_add(&t, &r->x);           /* T' = X3 + T (4) */
+    secp256k1_fe_mul(&r->y, &t, &l);       /* Y3 = L*(X3 + T) (1) */
+    secp256k1_fe_add(&r->y, &s);           /* Y3 = L*(X3 + T) + S^2 (2) */
+    secp256k1_fe_negate(&r->y, &r->y, 2);  /* Y3 = -(L*(X3 + T) + S^2) (3) */
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) {
+    SECP256K1_GEJ_VERIFY(a);
+
+    /** For secp256k1, 2Q is infinity if and only if Q is infinity. This is because if 2Q = infinity,
+     *  Q must equal -Q, or that Q.y == -(Q.y), or Q.y is 0. For a point on y^2 = x^3 + 7 to have
+     *  y=0, x^3 must be -7 mod p. However, -7 has no cube root mod p.
+     *
+     *  Having said this, if this function receives a point on a sextic twist, e.g. by
+     *  a fault attack, it is possible for y to be 0. This happens for y^2 = x^3 + 6,
+     *  since -6 does have a cube root mod p. For this point, this function will not set
+     *  the infinity flag even though the point doubles to infinity, and the result
+     *  point will be gibberish (z = 0 but infinity = 0).
+     */
+    if (a->infinity) {
+        secp256k1_gej_set_infinity(r);
+        if (rzr != NULL) {
+            secp256k1_fe_set_int(rzr, 1);
+        }
+        return;
+    }
+
+    if (rzr != NULL) {
+        *rzr = a->y;
+        secp256k1_fe_normalize_weak(rzr);
+    }
+
+    secp256k1_gej_double(r, a);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_gej *b, secp256k1_fe *rzr) {
+    /* 12 mul, 4 sqr, 11 add/negate/normalizes_to_zero (ignoring special cases) */
+    secp256k1_fe z22, z12, u1, u2, s1, s2, h, i, h2, h3, t;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GEJ_VERIFY(b);
+
+    if (a->infinity) {
+        VERIFY_CHECK(rzr == NULL);
+        *r = *b;
+        return;
+    }
+    if (b->infinity) {
+        if (rzr != NULL) {
+            secp256k1_fe_set_int(rzr, 1);
+        }
+        *r = *a;
+        return;
+    }
+
+    secp256k1_fe_sqr(&z22, &b->z);
+    secp256k1_fe_sqr(&z12, &a->z);
+    secp256k1_fe_mul(&u1, &a->x, &z22);
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
+            secp256k1_gej_double_var(r, a, rzr);
+        } else {
+            if (rzr != NULL) {
+                secp256k1_fe_set_int(rzr, 0);
+            }
+            secp256k1_gej_set_infinity(r);
+        }
+        return;
+    }
+
+    r->infinity = 0;
+    secp256k1_fe_mul(&t, &h, &b->z);
+    if (rzr != NULL) {
+        *rzr = t;
+    }
+    secp256k1_fe_mul(&r->z, &a->z, &t);
+
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_negate(&h2, &h2, 1);
+    secp256k1_fe_mul(&h3, &h2, &h);
+    secp256k1_fe_mul(&t, &u1, &h2);
+
+    secp256k1_fe_sqr(&r->x, &i);
+    secp256k1_fe_add(&r->x, &h3);
+    secp256k1_fe_add(&r->x, &t);
+    secp256k1_fe_add(&r->x, &t);
+
+    secp256k1_fe_add(&t, &r->x);
+    secp256k1_fe_mul(&r->y, &t, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1);
+    secp256k1_fe_add(&r->y, &h3);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, secp256k1_fe *rzr) {
+    /* Operations: 8 mul, 3 sqr, 11 add/negate/normalizes_to_zero (ignoring special cases) */
+    secp256k1_fe z12, u1, u2, s1, s2, h, i, h2, h3, t;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(b);
+
+    if (a->infinity) {
+        VERIFY_CHECK(rzr == NULL);
+        secp256k1_gej_set_ge(r, b);
+        return;
+    }
+    if (b->infinity) {
+        if (rzr != NULL) {
+            secp256k1_fe_set_int(rzr, 1);
+        }
+        *r = *a;
+        return;
+    }
+
+    secp256k1_fe_sqr(&z12, &a->z);
+    u1 = a->x;
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    s1 = a->y;
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, SECP256K1_GEJ_X_MAGNITUDE_MAX); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
+            secp256k1_gej_double_var(r, a, rzr);
+        } else {
+            if (rzr != NULL) {
+                secp256k1_fe_set_int(rzr, 0);
+            }
+            secp256k1_gej_set_infinity(r);
+        }
+        return;
+    }
+
+    r->infinity = 0;
+    if (rzr != NULL) {
+        *rzr = h;
+    }
+    secp256k1_fe_mul(&r->z, &a->z, &h);
+
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_negate(&h2, &h2, 1);
+    secp256k1_fe_mul(&h3, &h2, &h);
+    secp256k1_fe_mul(&t, &u1, &h2);
+
+    secp256k1_fe_sqr(&r->x, &i);
+    secp256k1_fe_add(&r->x, &h3);
+    secp256k1_fe_add(&r->x, &t);
+    secp256k1_fe_add(&r->x, &t);
+
+    secp256k1_fe_add(&t, &r->x);
+    secp256k1_fe_mul(&r->y, &t, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1);
+    secp256k1_fe_add(&r->y, &h3);
+
+    SECP256K1_GEJ_VERIFY(r);
+    if (rzr != NULL) SECP256K1_FE_VERIFY(rzr);
+}
+
+static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, const secp256k1_fe *bzinv) {
+    /* Operations: 9 mul, 3 sqr, 11 add/negate/normalizes_to_zero (ignoring special cases) */
+    secp256k1_fe az, z12, u1, u2, s1, s2, h, i, h2, h3, t;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(b);
+    SECP256K1_FE_VERIFY(bzinv);
+
+    if (a->infinity) {
+        secp256k1_fe bzinv2, bzinv3;
+        r->infinity = b->infinity;
+        secp256k1_fe_sqr(&bzinv2, bzinv);
+        secp256k1_fe_mul(&bzinv3, &bzinv2, bzinv);
+        secp256k1_fe_mul(&r->x, &b->x, &bzinv2);
+        secp256k1_fe_mul(&r->y, &b->y, &bzinv3);
+        secp256k1_fe_set_int(&r->z, 1);
+        SECP256K1_GEJ_VERIFY(r);
+        return;
+    }
+    if (b->infinity) {
+        *r = *a;
+        return;
+    }
+
+    /** We need to calculate (rx,ry,rz) = (ax,ay,az) + (bx,by,1/bzinv). Due to
+     *  secp256k1's isomorphism we can multiply the Z coordinates on both sides
+     *  by bzinv, and get: (rx,ry,rz*bzinv) = (ax,ay,az*bzinv) + (bx,by,1).
+     *  This means that (rx,ry,rz) can be calculated as
+     *  (ax,ay,az*bzinv) + (bx,by,1), when not applying the bzinv factor to rz.
+     *  The variable az below holds the modified Z coordinate for a, which is used
+     *  for the computation of rx and ry, but not for rz.
+     */
+    secp256k1_fe_mul(&az, &a->z, bzinv);
+
+    secp256k1_fe_sqr(&z12, &az);
+    u1 = a->x;
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    s1 = a->y;
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &az);
+    secp256k1_fe_negate(&h, &u1, SECP256K1_GEJ_X_MAGNITUDE_MAX); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
+            secp256k1_gej_double_var(r, a, NULL);
+        } else {
+            secp256k1_gej_set_infinity(r);
+        }
+        return;
+    }
+
+    r->infinity = 0;
+    secp256k1_fe_mul(&r->z, &a->z, &h);
+
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_negate(&h2, &h2, 1);
+    secp256k1_fe_mul(&h3, &h2, &h);
+    secp256k1_fe_mul(&t, &u1, &h2);
+
+    secp256k1_fe_sqr(&r->x, &i);
+    secp256k1_fe_add(&r->x, &h3);
+    secp256k1_fe_add(&r->x, &t);
+    secp256k1_fe_add(&r->x, &t);
+
+    secp256k1_fe_add(&t, &r->x);
+    secp256k1_fe_mul(&r->y, &t, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1);
+    secp256k1_fe_add(&r->y, &h3);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+
+static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b) {
+    /* Operations: 7 mul, 5 sqr, 21 add/cmov/half/mul_int/negate/normalizes_to_zero */
+    secp256k1_fe zz, u1, u2, s1, s2, t, tt, m, n, q, rr;
+    secp256k1_fe m_alt, rr_alt;
+    int degenerate;
+    SECP256K1_GEJ_VERIFY(a);
+    SECP256K1_GE_VERIFY(b);
+    VERIFY_CHECK(!b->infinity);
+
+    /*  In:
+     *    Eric Brier and Marc Joye, Weierstrass Elliptic Curves and Side-Channel Attacks.
+     *    In D. Naccache and P. Paillier, Eds., Public Key Cryptography, vol. 2274 of Lecture Notes in Computer Science, pages 335-345. Springer-Verlag, 2002.
+     *  we find as solution for a unified addition/doubling formula:
+     *    lambda = ((x1 + x2)^2 - x1 * x2 + a) / (y1 + y2), with a = 0 for secp256k1's curve equation.
+     *    x3 = lambda^2 - (x1 + x2)
+     *    2*y3 = lambda * (x1 + x2 - 2 * x3) - (y1 + y2).
+     *
+     *  Substituting x_i = Xi / Zi^2 and yi = Yi / Zi^3, for i=1,2,3, gives:
+     *    U1 = X1*Z2^2, U2 = X2*Z1^2
+     *    S1 = Y1*Z2^3, S2 = Y2*Z1^3
+     *    Z = Z1*Z2
+     *    T = U1+U2
+     *    M = S1+S2
+     *    Q = -T*M^2
+     *    R = T^2-U1*U2
+     *    X3 = R^2+Q
+     *    Y3 = -(R*(2*X3+Q)+M^4)/2
+     *    Z3 = M*Z
+     *  (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
+     *
+     *  This formula has the benefit of being the same for both addition
+     *  of distinct points and doubling. However, it breaks down in the
+     *  case that either point is infinity, or that y1 = -y2. We handle
+     *  these cases in the following ways:
+     *
+     *    - If b is infinity we simply bail by means of a VERIFY_CHECK.
+     *
+     *    - If a is infinity, we detect this, and at the end of the
+     *      computation replace the result (which will be meaningless,
+     *      but we compute to be constant-time) with b.x : b.y : 1.
+     *
+     *    - If a = -b, we have y1 = -y2, which is a degenerate case.
+     *      But here the answer is infinity, so we simply set the
+     *      infinity flag of the result, overriding the computed values
+     *      without even needing to cmov.
+     *
+     *    - If y1 = -y2 but x1 != x2, which does occur thanks to certain
+     *      properties of our curve (specifically, 1 has nontrivial cube
+     *      roots in our field, and the curve equation has no x coefficient)
+     *      then the answer is not infinity but also not given by the above
+     *      equation. In this case, we cmov in place an alternate expression
+     *      for lambda. Specifically (y1 - y2)/(x1 - x2). Where both these
+     *      expressions for lambda are defined, they are equal, and can be
+     *      obtained from each other by multiplication by (y1 + y2)/(y1 + y2)
+     *      then substitution of x^3 + 7 for y^2 (using the curve equation).
+     *      For all pairs of nonzero points (a, b) at least one is defined,
+     *      so this covers everything.
+     */
+
+    secp256k1_fe_sqr(&zz, &a->z);                       /* z = Z1^2 */
+    u1 = a->x;                                          /* u1 = U1 = X1*Z2^2 (GEJ_X_M) */
+    secp256k1_fe_mul(&u2, &b->x, &zz);                  /* u2 = U2 = X2*Z1^2 (1) */
+    s1 = a->y;                                          /* s1 = S1 = Y1*Z2^3 (GEJ_Y_M) */
+    secp256k1_fe_mul(&s2, &b->y, &zz);                  /* s2 = Y2*Z1^2 (1) */
+    secp256k1_fe_mul(&s2, &s2, &a->z);                  /* s2 = S2 = Y2*Z1^3 (1) */
+    t = u1; secp256k1_fe_add(&t, &u2);                  /* t = T = U1+U2 (GEJ_X_M+1) */
+    m = s1; secp256k1_fe_add(&m, &s2);                  /* m = M = S1+S2 (GEJ_Y_M+1) */
+    secp256k1_fe_sqr(&rr, &t);                          /* rr = T^2 (1) */
+    secp256k1_fe_negate(&m_alt, &u2, 1);                /* Malt = -X2*Z1^2 (2) */
+    secp256k1_fe_mul(&tt, &u1, &m_alt);                 /* tt = -U1*U2 (1) */
+    secp256k1_fe_add(&rr, &tt);                         /* rr = R = T^2-U1*U2 (2) */
+    /* If lambda = R/M = R/0 we have a problem (except in the "trivial"
+     * case that Z = z1z2 = 0, and this is special-cased later on). */
+    degenerate = secp256k1_fe_normalizes_to_zero(&m);
+    /* This only occurs when y1 == -y2 and x1^3 == x2^3, but x1 != x2.
+     * This means either x1 == beta*x2 or beta*x1 == x2, where beta is
+     * a nontrivial cube root of one. In either case, an alternate
+     * non-indeterminate expression for lambda is (y1 - y2)/(x1 - x2),
+     * so we set R/M equal to this. */
+    rr_alt = s1;
+    secp256k1_fe_mul_int(&rr_alt, 2);       /* rr_alt = Y1*Z2^3 - Y2*Z1^3 (GEJ_Y_M*2) */
+    secp256k1_fe_add(&m_alt, &u1);          /* Malt = X1*Z2^2 - X2*Z1^2 (GEJ_X_M+2) */
+
+    secp256k1_fe_cmov(&rr_alt, &rr, !degenerate);       /* rr_alt (GEJ_Y_M*2) */
+    secp256k1_fe_cmov(&m_alt, &m, !degenerate);         /* m_alt (GEJ_X_M+2) */
+    /* Now Ralt / Malt = lambda and is guaranteed not to be Ralt / 0.
+     * From here on out Ralt and Malt represent the numerator
+     * and denominator of lambda; R and M represent the explicit
+     * expressions x1^2 + x2^2 + x1x2 and y1 + y2. */
+    secp256k1_fe_sqr(&n, &m_alt);                       /* n = Malt^2 (1) */
+    secp256k1_fe_negate(&q, &t,
+        SECP256K1_GEJ_X_MAGNITUDE_MAX + 1);             /* q = -T (GEJ_X_M+2) */
+    secp256k1_fe_mul(&q, &q, &n);                       /* q = Q = -T*Malt^2 (1) */
+    /* These two lines use the observation that either M == Malt or M == 0,
+     * so M^3 * Malt is either Malt^4 (which is computed by squaring), or
+     * zero (which is "computed" by cmov). So the cost is one squaring
+     * versus two multiplications. */
+    secp256k1_fe_sqr(&n, &n);                           /* n = Malt^4 (1) */
+    secp256k1_fe_cmov(&n, &m, degenerate);              /* n = M^3 * Malt (GEJ_Y_M+1) */
+    secp256k1_fe_sqr(&t, &rr_alt);                      /* t = Ralt^2 (1) */
+    secp256k1_fe_mul(&r->z, &a->z, &m_alt);             /* r->z = Z3 = Malt*Z (1) */
+    secp256k1_fe_add(&t, &q);                           /* t = Ralt^2 + Q (2) */
+    r->x = t;                                           /* r->x = X3 = Ralt^2 + Q (2) */
+    secp256k1_fe_mul_int(&t, 2);                        /* t = 2*X3 (4) */
+    secp256k1_fe_add(&t, &q);                           /* t = 2*X3 + Q (5) */
+    secp256k1_fe_mul(&t, &t, &rr_alt);                  /* t = Ralt*(2*X3 + Q) (1) */
+    secp256k1_fe_add(&t, &n);                           /* t = Ralt*(2*X3 + Q) + M^3*Malt (GEJ_Y_M+2) */
+    secp256k1_fe_negate(&r->y, &t,
+        SECP256K1_GEJ_Y_MAGNITUDE_MAX + 2);             /* r->y = -(Ralt*(2*X3 + Q) + M^3*Malt) (GEJ_Y_M+3) */
+    secp256k1_fe_half(&r->y);                           /* r->y = Y3 = -(Ralt*(2*X3 + Q) + M^3*Malt)/2 ((GEJ_Y_M+3)/2 + 1) */
+
+    /* In case a->infinity == 1, replace r with (b->x, b->y, 1). */
+    secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
+    secp256k1_fe_cmov(&r->y, &b->y, a->infinity);
+    secp256k1_fe_cmov(&r->z, &secp256k1_fe_one, a->infinity);
+
+    /* Set r->infinity if r->z is 0.
+     *
+     * If a->infinity is set, then r->infinity = (r->z == 0) = (1 == 0) = false,
+     * which is correct because the function assumes that b is not infinity.
+     *
+     * Now assume !a->infinity. This implies Z = Z1 != 0.
+     *
+     * Case y1 = -y2:
+     * In this case we could have a = -b, namely if x1 = x2.
+     * We have degenerate = true, r->z = (x1 - x2) * Z.
+     * Then r->infinity = ((x1 - x2)Z == 0) = (x1 == x2) = (a == -b).
+     *
+     * Case y1 != -y2:
+     * In this case, we can't have a = -b.
+     * We have degenerate = false, r->z = (y1 + y2) * Z.
+     * Then r->infinity = ((y1 + y2)Z == 0) = (y1 == -y2) = false. */
+    r->infinity = secp256k1_fe_normalizes_to_zero(&r->z);
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_gej_rescale(secp256k1_gej *r, const secp256k1_fe *s) {
+    /* Operations: 4 mul, 1 sqr */
+    secp256k1_fe zz;
+    SECP256K1_GEJ_VERIFY(r);
+    SECP256K1_FE_VERIFY(s);
+    VERIFY_CHECK(!secp256k1_fe_normalizes_to_zero_var(s));
+
+    secp256k1_fe_sqr(&zz, s);
+    secp256k1_fe_mul(&r->x, &r->x, &zz);                /* r->x *= s^2 */
+    secp256k1_fe_mul(&r->y, &r->y, &zz);
+    secp256k1_fe_mul(&r->y, &r->y, s);                  /* r->y *= s^3 */
+    secp256k1_fe_mul(&r->z, &r->z, s);                  /* r->z *= s   */
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static void secp256k1_ge_to_storage(secp256k1_ge_storage *r, const secp256k1_ge *a) {
+    secp256k1_fe x, y;
+    SECP256K1_GE_VERIFY(a);
+    VERIFY_CHECK(!a->infinity);
+
+    x = a->x;
+    secp256k1_fe_normalize(&x);
+    y = a->y;
+    secp256k1_fe_normalize(&y);
+    secp256k1_fe_to_storage(&r->x, &x);
+    secp256k1_fe_to_storage(&r->y, &y);
+}
+
+static void secp256k1_ge_from_storage(secp256k1_ge *r, const secp256k1_ge_storage *a) {
+    secp256k1_fe_from_storage(&r->x, &a->x);
+    secp256k1_fe_from_storage(&r->y, &a->y);
+    r->infinity = 0;
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static SECP256K1_INLINE void secp256k1_gej_cmov(secp256k1_gej *r, const secp256k1_gej *a, int flag) {
+    SECP256K1_GEJ_VERIFY(r);
+    SECP256K1_GEJ_VERIFY(a);
+
+    secp256k1_fe_cmov(&r->x, &a->x, flag);
+    secp256k1_fe_cmov(&r->y, &a->y, flag);
+    secp256k1_fe_cmov(&r->z, &a->z, flag);
+    r->infinity ^= (r->infinity ^ a->infinity) & flag;
+
+    SECP256K1_GEJ_VERIFY(r);
+}
+
+static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage *r, const secp256k1_ge_storage *a, int flag) {
+    secp256k1_fe_storage_cmov(&r->x, &a->x, flag);
+    secp256k1_fe_storage_cmov(&r->y, &a->y, flag);
+}
+
+static void secp256k1_ge_mul_lambda(secp256k1_ge *r, const secp256k1_ge *a) {
+    SECP256K1_GE_VERIFY(a);
+
+    *r = *a;
+    secp256k1_fe_mul(&r->x, &r->x, &secp256k1_const_beta);
+
+    SECP256K1_GE_VERIFY(r);
+}
+
+static int secp256k1_ge_is_in_correct_subgroup(const secp256k1_ge* ge) {
+#ifdef EXHAUSTIVE_TEST_ORDER
+    secp256k1_gej out;
+    int i;
+    SECP256K1_GE_VERIFY(ge);
+
+    /* A very simple EC multiplication ladder that avoids a dependency on ecmult. */
+    secp256k1_gej_set_infinity(&out);
+    for (i = 0; i < 32; ++i) {
+        secp256k1_gej_double_var(&out, &out, NULL);
+        if ((((uint32_t)EXHAUSTIVE_TEST_ORDER) >> (31 - i)) & 1) {
+            secp256k1_gej_add_ge_var(&out, &out, ge, NULL);
+        }
+    }
+    return secp256k1_gej_is_infinity(&out);
+#else
+    SECP256K1_GE_VERIFY(ge);
+
+    (void)ge;
+    /* The real secp256k1 group has cofactor 1, so the subgroup is the entire curve. */
+    return 1;
+#endif
+}
+
+static int secp256k1_ge_x_on_curve_var(const secp256k1_fe *x) {
+    secp256k1_fe c;
+    secp256k1_fe_sqr(&c, x);
+    secp256k1_fe_mul(&c, &c, x);
+    secp256k1_fe_add_int(&c, SECP256K1_B);
+    return secp256k1_fe_is_square_var(&c);
+}
+
+static int secp256k1_ge_x_frac_on_curve_var(const secp256k1_fe *xn, const secp256k1_fe *xd) {
+    /* We want to determine whether (xn/xd) is on the curve.
+     *
+     * (xn/xd)^3 + 7 is square <=> xd*xn^3 + 7*xd^4 is square (multiplying by xd^4, a square).
+     */
+     secp256k1_fe r, t;
+     VERIFY_CHECK(!secp256k1_fe_normalizes_to_zero_var(xd));
+
+     secp256k1_fe_mul(&r, xd, xn); /* r = xd*xn */
+     secp256k1_fe_sqr(&t, xn); /* t = xn^2 */
+     secp256k1_fe_mul(&r, &r, &t); /* r = xd*xn^3 */
+     secp256k1_fe_sqr(&t, xd); /* t = xd^2 */
+     secp256k1_fe_sqr(&t, &t); /* t = xd^4 */
+     VERIFY_CHECK(SECP256K1_B <= 31);
+     secp256k1_fe_mul_int(&t, SECP256K1_B); /* t = 7*xd^4 */
+     secp256k1_fe_add(&r, &t); /* r = xd*xn^3 + 7*xd^4 */
+     return secp256k1_fe_is_square_var(&r);
+}
+
+static void secp256k1_ge_to_bytes(unsigned char *buf, const secp256k1_ge *a) {
+    secp256k1_ge_storage s;
+
+    /* We require that the secp256k1_ge_storage type is exactly 64 bytes.
+     * This is formally not guaranteed by the C standard, but should hold on any
+     * sane compiler in the real world. */
+    STATIC_ASSERT(sizeof(secp256k1_ge_storage) == 64);
+    VERIFY_CHECK(!secp256k1_ge_is_infinity(a));
+    secp256k1_ge_to_storage(&s, a);
+    memcpy(buf, &s, 64);
+}
+
+static void secp256k1_ge_from_bytes(secp256k1_ge *r, const unsigned char *buf) {
+    secp256k1_ge_storage s;
+
+    STATIC_ASSERT(sizeof(secp256k1_ge_storage) == 64);
+    memcpy(&s, buf, 64);
+    secp256k1_ge_from_storage(r, &s);
+}
+
+static void secp256k1_ge_to_bytes_ext(unsigned char *data, const secp256k1_ge *ge) {
+    if (secp256k1_ge_is_infinity(ge)) {
+        memset(data, 0, 64);
+    } else {
+        secp256k1_ge_to_bytes(data, ge);
+    }
+}
+
+static void secp256k1_ge_from_bytes_ext(secp256k1_ge *ge, const unsigned char *data) {
+    static const unsigned char zeros[64] = { 0 };
+    if (secp256k1_memcmp_var(data, zeros, sizeof(zeros)) == 0) {
+        secp256k1_ge_set_infinity(ge);
+    } else {
+        secp256k1_ge_from_bytes(ge, data);
+    }
+}
+
+#endif /* SECP256K1_GROUP_IMPL_H */
--- a/libsecp256k1/src/hash.h
+++ b/libsecp256k1/src/hash.h
@@ -0,0 +1,44 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_HASH_H
+#define SECP256K1_HASH_H
+
+#include <stdlib.h>
+#include <stdint.h>
+
+typedef struct {
+    uint32_t s[8];
+    unsigned char buf[64];
+    uint64_t bytes;
+} secp256k1_sha256;
+
+static void secp256k1_sha256_initialize(secp256k1_sha256 *hash);
+static void secp256k1_sha256_write(secp256k1_sha256 *hash, const unsigned char *data, size_t size);
+static void secp256k1_sha256_finalize(secp256k1_sha256 *hash, unsigned char *out32);
+static void secp256k1_sha256_clear(secp256k1_sha256 *hash);
+
+typedef struct {
+    secp256k1_sha256 inner, outer;
+} secp256k1_hmac_sha256;
+
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256 *hash, const unsigned char *key, size_t size);
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256 *hash, const unsigned char *data, size_t size);
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256 *hash, unsigned char *out32);
+static void secp256k1_hmac_sha256_clear(secp256k1_hmac_sha256 *hash);
+
+typedef struct {
+    unsigned char v[32];
+    unsigned char k[32];
+    int retry;
+} secp256k1_rfc6979_hmac_sha256;
+
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256 *rng, const unsigned char *key, size_t keylen);
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256 *rng, unsigned char *out, size_t outlen);
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256 *rng);
+static void secp256k1_rfc6979_hmac_sha256_clear(secp256k1_rfc6979_hmac_sha256 *rng);
+
+#endif /* SECP256K1_HASH_H */
--- a/libsecp256k1/src/hash_impl.h
+++ b/libsecp256k1/src/hash_impl.h
@@ -0,0 +1,299 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_HASH_IMPL_H
+#define SECP256K1_HASH_IMPL_H
+
+#include "hash.h"
+#include "util.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z) (((x) & (y)) | ((z) & ((x) | (y))))
+#define Sigma0(x) (((x) >> 2 | (x) << 30) ^ ((x) >> 13 | (x) << 19) ^ ((x) >> 22 | (x) << 10))
+#define Sigma1(x) (((x) >> 6 | (x) << 26) ^ ((x) >> 11 | (x) << 21) ^ ((x) >> 25 | (x) << 7))
+#define sigma0(x) (((x) >> 7 | (x) << 25) ^ ((x) >> 18 | (x) << 14) ^ ((x) >> 3))
+#define sigma1(x) (((x) >> 17 | (x) << 15) ^ ((x) >> 19 | (x) << 13) ^ ((x) >> 10))
+
+#define Round(a,b,c,d,e,f,g,h,k,w) do { \
+    uint32_t t1 = (h) + Sigma1(e) + Ch((e), (f), (g)) + (k) + (w); \
+    uint32_t t2 = Sigma0(a) + Maj((a), (b), (c)); \
+    (d) += t1; \
+    (h) = t1 + t2; \
+} while(0)
+
+static void secp256k1_sha256_initialize(secp256k1_sha256 *hash) {
+    hash->s[0] = 0x6a09e667ul;
+    hash->s[1] = 0xbb67ae85ul;
+    hash->s[2] = 0x3c6ef372ul;
+    hash->s[3] = 0xa54ff53aul;
+    hash->s[4] = 0x510e527ful;
+    hash->s[5] = 0x9b05688cul;
+    hash->s[6] = 0x1f83d9abul;
+    hash->s[7] = 0x5be0cd19ul;
+    hash->bytes = 0;
+}
+
+/** Perform one SHA-256 transformation, processing 16 big endian 32-bit words. */
+static void secp256k1_sha256_transform(uint32_t* s, const unsigned char* buf) {
+    uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
+    uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+    Round(a, b, c, d, e, f, g, h, 0x428a2f98,  w0 = secp256k1_read_be32(&buf[0]));
+    Round(h, a, b, c, d, e, f, g, 0x71374491,  w1 = secp256k1_read_be32(&buf[4]));
+    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf,  w2 = secp256k1_read_be32(&buf[8]));
+    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5,  w3 = secp256k1_read_be32(&buf[12]));
+    Round(e, f, g, h, a, b, c, d, 0x3956c25b,  w4 = secp256k1_read_be32(&buf[16]));
+    Round(d, e, f, g, h, a, b, c, 0x59f111f1,  w5 = secp256k1_read_be32(&buf[20]));
+    Round(c, d, e, f, g, h, a, b, 0x923f82a4,  w6 = secp256k1_read_be32(&buf[24]));
+    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5,  w7 = secp256k1_read_be32(&buf[28]));
+    Round(a, b, c, d, e, f, g, h, 0xd807aa98,  w8 = secp256k1_read_be32(&buf[32]));
+    Round(h, a, b, c, d, e, f, g, 0x12835b01,  w9 = secp256k1_read_be32(&buf[36]));
+    Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = secp256k1_read_be32(&buf[40]));
+    Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = secp256k1_read_be32(&buf[44]));
+    Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = secp256k1_read_be32(&buf[48]));
+    Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = secp256k1_read_be32(&buf[52]));
+    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = secp256k1_read_be32(&buf[56]));
+    Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = secp256k1_read_be32(&buf[60]));
+
+    Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x0fc19dc6, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x240ca1cc, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x2de92c6f, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4a7484aa, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x76f988da, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x983e5152, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa831c66d, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xb00327c8, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xbf597fc7, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xc6e00bf3, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd5a79147, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0x06ca6351, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x14292967, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x27b70a85, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x2e1b2138, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x53380d13, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x650a7354, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x766a0abb, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x81c2c92e, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x92722c85, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0xa81a664b, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0xc24b8b70, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0xc76c51a3, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0xd192e819, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xd6990624, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xf40e3585, w14 += sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0x106aa070, w15 += sigma1(w13) + w8 + sigma0(w0));
+
+    Round(a, b, c, d, e, f, g, h, 0x19a4c116, w0 += sigma1(w14) + w9 + sigma0(w1));
+    Round(h, a, b, c, d, e, f, g, 0x1e376c08, w1 += sigma1(w15) + w10 + sigma0(w2));
+    Round(g, h, a, b, c, d, e, f, 0x2748774c, w2 += sigma1(w0) + w11 + sigma0(w3));
+    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5, w3 += sigma1(w1) + w12 + sigma0(w4));
+    Round(e, f, g, h, a, b, c, d, 0x391c0cb3, w4 += sigma1(w2) + w13 + sigma0(w5));
+    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w5 += sigma1(w3) + w14 + sigma0(w6));
+    Round(c, d, e, f, g, h, a, b, 0x5b9cca4f, w6 += sigma1(w4) + w15 + sigma0(w7));
+    Round(b, c, d, e, f, g, h, a, 0x682e6ff3, w7 += sigma1(w5) + w0 + sigma0(w8));
+    Round(a, b, c, d, e, f, g, h, 0x748f82ee, w8 += sigma1(w6) + w1 + sigma0(w9));
+    Round(h, a, b, c, d, e, f, g, 0x78a5636f, w9 += sigma1(w7) + w2 + sigma0(w10));
+    Round(g, h, a, b, c, d, e, f, 0x84c87814, w10 += sigma1(w8) + w3 + sigma0(w11));
+    Round(f, g, h, a, b, c, d, e, 0x8cc70208, w11 += sigma1(w9) + w4 + sigma0(w12));
+    Round(e, f, g, h, a, b, c, d, 0x90befffa, w12 += sigma1(w10) + w5 + sigma0(w13));
+    Round(d, e, f, g, h, a, b, c, 0xa4506ceb, w13 += sigma1(w11) + w6 + sigma0(w14));
+    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7, w14 + sigma1(w12) + w7 + sigma0(w15));
+    Round(b, c, d, e, f, g, h, a, 0xc67178f2, w15 + sigma1(w13) + w8 + sigma0(w0));
+
+    s[0] += a;
+    s[1] += b;
+    s[2] += c;
+    s[3] += d;
+    s[4] += e;
+    s[5] += f;
+    s[6] += g;
+    s[7] += h;
+}
+
+static void secp256k1_sha256_write(secp256k1_sha256 *hash, const unsigned char *data, size_t len) {
+    size_t bufsize = hash->bytes & 0x3F;
+    hash->bytes += len;
+    VERIFY_CHECK(hash->bytes >= len);
+    while (len >= 64 - bufsize) {
+        /* Fill the buffer, and process it. */
+        size_t chunk_len = 64 - bufsize;
+        memcpy(hash->buf + bufsize, data, chunk_len);
+        data += chunk_len;
+        len -= chunk_len;
+        secp256k1_sha256_transform(hash->s, hash->buf);
+        bufsize = 0;
+    }
+    if (len) {
+        /* Fill the buffer with what remains. */
+        memcpy(hash->buf + bufsize, data, len);
+    }
+}
+
+static void secp256k1_sha256_finalize(secp256k1_sha256 *hash, unsigned char *out32) {
+    static const unsigned char pad[64] = {0x80};
+    unsigned char sizedesc[8];
+    int i;
+    /* The maximum message size of SHA256 is 2^64-1 bits. */
+    VERIFY_CHECK(hash->bytes < ((uint64_t)1 << 61));
+    secp256k1_write_be32(&sizedesc[0], hash->bytes >> 29);
+    secp256k1_write_be32(&sizedesc[4], hash->bytes << 3);
+    secp256k1_sha256_write(hash, pad, 1 + ((119 - (hash->bytes % 64)) % 64));
+    secp256k1_sha256_write(hash, sizedesc, 8);
+    for (i = 0; i < 8; i++) {
+        secp256k1_write_be32(&out32[4*i], hash->s[i]);
+        hash->s[i] = 0;
+    }
+}
+
+/* Initializes a sha256 struct and writes the 64 byte string
+ * SHA256(tag)||SHA256(tag) into it. */
+static void secp256k1_sha256_initialize_tagged(secp256k1_sha256 *hash, const unsigned char *tag, size_t taglen) {
+    unsigned char buf[32];
+    secp256k1_sha256_initialize(hash);
+    secp256k1_sha256_write(hash, tag, taglen);
+    secp256k1_sha256_finalize(hash, buf);
+
+    secp256k1_sha256_initialize(hash);
+    secp256k1_sha256_write(hash, buf, 32);
+    secp256k1_sha256_write(hash, buf, 32);
+}
+
+static void secp256k1_sha256_clear(secp256k1_sha256 *hash) {
+    secp256k1_memclear(hash, sizeof(*hash));
+}
+
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256 *hash, const unsigned char *key, size_t keylen) {
+    size_t n;
+    unsigned char rkey[64];
+    if (keylen <= sizeof(rkey)) {
+        memcpy(rkey, key, keylen);
+        memset(rkey + keylen, 0, sizeof(rkey) - keylen);
+    } else {
+        secp256k1_sha256 sha256;
+        secp256k1_sha256_initialize(&sha256);
+        secp256k1_sha256_write(&sha256, key, keylen);
+        secp256k1_sha256_finalize(&sha256, rkey);
+        memset(rkey + 32, 0, 32);
+    }
+
+    secp256k1_sha256_initialize(&hash->outer);
+    for (n = 0; n < sizeof(rkey); n++) {
+        rkey[n] ^= 0x5c;
+    }
+    secp256k1_sha256_write(&hash->outer, rkey, sizeof(rkey));
+
+    secp256k1_sha256_initialize(&hash->inner);
+    for (n = 0; n < sizeof(rkey); n++) {
+        rkey[n] ^= 0x5c ^ 0x36;
+    }
+    secp256k1_sha256_write(&hash->inner, rkey, sizeof(rkey));
+    secp256k1_memclear(rkey, sizeof(rkey));
+}
+
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256 *hash, const unsigned char *data, size_t size) {
+    secp256k1_sha256_write(&hash->inner, data, size);
+}
+
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256 *hash, unsigned char *out32) {
+    unsigned char temp[32];
+    secp256k1_sha256_finalize(&hash->inner, temp);
+    secp256k1_sha256_write(&hash->outer, temp, 32);
+    secp256k1_memclear(temp, sizeof(temp));
+    secp256k1_sha256_finalize(&hash->outer, out32);
+}
+
+static void secp256k1_hmac_sha256_clear(secp256k1_hmac_sha256 *hash) {
+    secp256k1_memclear(hash, sizeof(*hash));
+}
+
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256 *rng, const unsigned char *key, size_t keylen) {
+    secp256k1_hmac_sha256 hmac;
+    static const unsigned char zero[1] = {0x00};
+    static const unsigned char one[1] = {0x01};
+
+    memset(rng->v, 0x01, 32); /* RFC6979 3.2.b. */
+    memset(rng->k, 0x00, 32); /* RFC6979 3.2.c. */
+
+    /* RFC6979 3.2.d. */
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_write(&hmac, zero, 1);
+    secp256k1_hmac_sha256_write(&hmac, key, keylen);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+
+    /* RFC6979 3.2.f. */
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_write(&hmac, one, 1);
+    secp256k1_hmac_sha256_write(&hmac, key, keylen);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+    secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+    secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+    secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+    rng->retry = 0;
+}
+
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256 *rng, unsigned char *out, size_t outlen) {
+    /* RFC6979 3.2.h. */
+    static const unsigned char zero[1] = {0x00};
+    if (rng->retry) {
+        secp256k1_hmac_sha256 hmac;
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_write(&hmac, zero, 1);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->k);
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+    }
+
+    while (outlen > 0) {
+        secp256k1_hmac_sha256 hmac;
+        int now = outlen;
+        secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
+        secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, rng->v);
+        if (now > 32) {
+            now = 32;
+        }
+        memcpy(out, rng->v, now);
+        out += now;
+        outlen -= now;
+    }
+
+    rng->retry = 1;
+}
+
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256 *rng) {
+    (void) rng;
+}
+
+static void secp256k1_rfc6979_hmac_sha256_clear(secp256k1_rfc6979_hmac_sha256 *rng) {
+    secp256k1_memclear(rng, sizeof(*rng));
+}
+
+#undef Round
+#undef sigma1
+#undef sigma0
+#undef Sigma1
+#undef Sigma0
+#undef Maj
+#undef Ch
+
+#endif /* SECP256K1_HASH_IMPL_H */
--- a/libsecp256k1/src/hsort.h
+++ b/libsecp256k1/src/hsort.h
@@ -0,0 +1,33 @@
+/***********************************************************************
+ * Copyright (c) 2021 Russell O'Connor, Jonas Nick                     *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_HSORT_H
+#define SECP256K1_HSORT_H
+
+#include <stddef.h>
+#include <string.h>
+
+/* In-place, iterative heapsort with an interface matching glibc's qsort_r. This
+ * is preferred over standard library implementations because they generally
+ * make no guarantee about being fast for malicious inputs.
+ * Remember that heapsort is unstable.
+ *
+ * In/Out: ptr: pointer to the array to sort. The contents of the array are
+ *              sorted in ascending order according to the comparison function.
+ * In:   count: number of elements in the array.
+ *        size: size in bytes of each element.
+ *         cmp: pointer to a comparison function that is called with two
+ *              arguments that point to the objects being compared. The cmp_data
+ *              argument of secp256k1_hsort is passed as third argument. The
+ *              function must return an integer less than, equal to, or greater
+ *              than zero if the first argument is considered to be respectively
+ *              less than, equal to, or greater than the second.
+ *    cmp_data: pointer passed as third argument to cmp.
+ */
+static void secp256k1_hsort(void *ptr, size_t count, size_t size,
+                            int (*cmp)(const void *, const void *, void *),
+                            void *cmp_data);
+#endif
--- a/libsecp256k1/src/hsort_impl.h
+++ b/libsecp256k1/src/hsort_impl.h
@@ -0,0 +1,125 @@
+/***********************************************************************
+ * Copyright (c) 2021 Russell O'Connor, Jonas Nick                     *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_HSORT_IMPL_H
+#define SECP256K1_HSORT_IMPL_H
+
+#include "hsort.h"
+
+/* An array is a heap when, for all non-zero indexes i, the element at index i
+ * compares as less than or equal to the element at index parent(i) = (i-1)/2.
+ */
+
+static SECP256K1_INLINE size_t secp256k1_heap_child1(size_t i) {
+    VERIFY_CHECK(i <= (SIZE_MAX - 1)/2);
+    return 2*i + 1;
+}
+
+static SECP256K1_INLINE size_t secp256k1_heap_child2(size_t i) {
+    VERIFY_CHECK(i <= SIZE_MAX/2 - 1);
+    return secp256k1_heap_child1(i)+1;
+}
+
+static SECP256K1_INLINE void secp256k1_heap_swap64(unsigned char *a, unsigned char *b, size_t len) {
+    unsigned char tmp[64];
+    VERIFY_CHECK(len <= 64);
+    memcpy(tmp, a, len);
+    memmove(a, b, len);
+    memcpy(b, tmp, len);
+}
+
+static SECP256K1_INLINE void secp256k1_heap_swap(unsigned char *arr, size_t i, size_t j, size_t stride) {
+    unsigned char *a = arr + i*stride;
+    unsigned char *b = arr + j*stride;
+    size_t len = stride;
+    while (64 < len) {
+        secp256k1_heap_swap64(a + (len - 64), b + (len - 64), 64);
+        len -= 64;
+    }
+    secp256k1_heap_swap64(a, b, len);
+}
+
+/* This function accepts an array arr containing heap_size elements, each of
+ * size stride. The elements in the array at indices >i satisfy the max-heap
+ * property, i.e., for any element at index j (where j > i), all of its children
+ * are smaller than the element itself. The purpose of the function is to update
+ * the array so that all elements at indices >=i satisfy the max-heap
+ * property. */
+static SECP256K1_INLINE void secp256k1_heap_down(unsigned char *arr, size_t i, size_t heap_size, size_t stride,
+                            int (*cmp)(const void *, const void *, void *), void *cmp_data) {
+    while (i < heap_size/2) {
+        VERIFY_CHECK(i <= SIZE_MAX/2 - 1);
+        /* Proof:
+         * i < heap_size/2
+         * i + 1 <= heap_size/2
+         * 2*i + 2 <= heap_size <= SIZE_MAX
+         * 2*i <= SIZE_MAX - 2
+         */
+
+        VERIFY_CHECK(secp256k1_heap_child1(i) < heap_size);
+        /* Proof:
+         * i < heap_size/2
+         * i + 1 <= heap_size/2
+         * 2*i + 2 <= heap_size
+         * 2*i + 1 < heap_size
+         * child1(i) < heap_size
+         */
+
+        /* Let [x] be notation for the contents at arr[x*stride].
+         *
+         * If [child1(i)] > [i] and [child2(i)] > [i],
+         *   swap [i] with the larger child to ensure the new parent is larger
+         *   than both children. When [child1(i)] == [child2(i)], swap [i] with
+         *   [child2(i)].
+         * Else if [child1(i)] > [i], swap [i] with [child1(i)].
+         * Else if [child2(i)] > [i], swap [i] with [child2(i)].
+         */
+        if (secp256k1_heap_child2(i) < heap_size
+                && 0 <= cmp(arr + secp256k1_heap_child2(i)*stride, arr + secp256k1_heap_child1(i)*stride, cmp_data)) {
+            if (0 < cmp(arr + secp256k1_heap_child2(i)*stride, arr + i*stride, cmp_data)) {
+                secp256k1_heap_swap(arr, i, secp256k1_heap_child2(i), stride);
+                i = secp256k1_heap_child2(i);
+            } else {
+                /* At this point we have [child2(i)] >= [child1(i)] and we have
+                 * [child2(i)] <= [i], and thus [child1(i)] <= [i] which means
+                 * that the next comparison can be skipped. */
+                return;
+            }
+        } else if (0 < cmp(arr + secp256k1_heap_child1(i)*stride, arr +         i*stride, cmp_data)) {
+            secp256k1_heap_swap(arr, i, secp256k1_heap_child1(i), stride);
+            i = secp256k1_heap_child1(i);
+        } else {
+            return;
+        }
+    }
+    /* heap_size/2 <= i
+     * heap_size/2 < i + 1
+     * heap_size < 2*i + 2
+     * heap_size <= 2*i + 1
+     * heap_size <= child1(i)
+     * Thus child1(i) and child2(i) are now out of bounds and we are at a leaf.
+     */
+}
+
+/* In-place heap sort. */
+static void secp256k1_hsort(void *ptr, size_t count, size_t size,
+                            int (*cmp)(const void *, const void *, void *),
+                            void *cmp_data) {
+    size_t i;
+
+    for (i = count/2; 0 < i; --i) {
+        secp256k1_heap_down(ptr, i-1, count, size, cmp, cmp_data);
+    }
+    for (i = count; 1 < i; --i) {
+        /* Extract the largest value from the heap */
+        secp256k1_heap_swap(ptr, 0, i-1, size);
+
+        /* Repair the heap condition */
+        secp256k1_heap_down(ptr, 0, i-1, size, cmp, cmp_data);
+    }
+}
+
+#endif
--- a/libsecp256k1/src/int128.h
+++ b/libsecp256k1/src/int128.h
@@ -0,0 +1,90 @@
+#ifndef SECP256K1_INT128_H
+#define SECP256K1_INT128_H
+
+#include "util.h"
+
+#if defined(SECP256K1_WIDEMUL_INT128)
+#  if defined(SECP256K1_INT128_NATIVE)
+#    include "int128_native.h"
+#  elif defined(SECP256K1_INT128_STRUCT)
+#    include "int128_struct.h"
+#  else
+#    error "Please select int128 implementation"
+#  endif
+
+/* Construct an unsigned 128-bit value from a high and a low 64-bit value. */
+static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo);
+
+/* Multiply two unsigned 64-bit values a and b and write the result to r. */
+static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
+
+/* Multiply two unsigned 64-bit values a and b and add the result to r.
+ * The final result is taken modulo 2^128.
+ */
+static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
+
+/* Add an unsigned 64-bit value a to r.
+ * The final result is taken modulo 2^128.
+ */
+static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a);
+
+/* Unsigned (logical) right shift.
+ * Non-constant time in n.
+ */
+static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n);
+
+/* Return the low 64-bits of a 128-bit value as an unsigned 64-bit value. */
+static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a);
+
+/* Return the high 64-bits of a 128-bit value as an unsigned 64-bit value. */
+static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a);
+
+/* Write an unsigned 64-bit value to r. */
+static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a);
+
+/* Tests if r is strictly less than to 2^n.
+ * n must be strictly less than 128.
+ */
+static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n);
+
+/* Construct an signed 128-bit value from a high and a low 64-bit value. */
+static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo);
+
+/* Multiply two signed 64-bit values a and b and write the result to r. */
+static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b);
+
+/* Multiply two signed 64-bit values a and b and add the result to r.
+ * Overflow or underflow from the addition is undefined behaviour.
+ */
+static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b);
+
+/* Compute a*d - b*c from signed 64-bit values and write the result to r. */
+static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d);
+
+/* Signed (arithmetic) right shift.
+ * Non-constant time in b.
+ */
+static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int b);
+
+/* Return the input value modulo 2^64. */
+static SECP256K1_INLINE uint64_t secp256k1_i128_to_u64(const secp256k1_int128 *a);
+
+/* Return the value as a signed 64-bit value.
+ * Requires the input to be between INT64_MIN and INT64_MAX.
+ */
+static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a);
+
+/* Write a signed 64-bit value to r. */
+static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a);
+
+/* Compare two 128-bit values for equality. */
+static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b);
+
+/* Tests if r is equal to sign*2^n (sign must be 1 or -1).
+ * n must be strictly less than 127.
+ */
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign);
+
+#endif
+
+#endif
--- a/libsecp256k1/src/int128_impl.h
+++ b/libsecp256k1/src/int128_impl.h
@@ -0,0 +1,18 @@
+#ifndef SECP256K1_INT128_IMPL_H
+#define SECP256K1_INT128_IMPL_H
+
+#include "util.h"
+
+#include "int128.h"
+
+#if defined(SECP256K1_WIDEMUL_INT128)
+#  if defined(SECP256K1_INT128_NATIVE)
+#    include "int128_native_impl.h"
+#  elif defined(SECP256K1_INT128_STRUCT)
+#    include "int128_struct_impl.h"
+#  else
+#    error "Please select int128 implementation"
+#  endif
+#endif
+
+#endif
--- a/libsecp256k1/src/int128_native.h
+++ b/libsecp256k1/src/int128_native.h
@@ -0,0 +1,19 @@
+#ifndef SECP256K1_INT128_NATIVE_H
+#define SECP256K1_INT128_NATIVE_H
+
+#include <stdint.h>
+#include "util.h"
+
+#if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__)
+SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
+SECP256K1_GNUC_EXT typedef __int128 int128_t;
+# define UINT128_MAX ((uint128_t)(-1))
+# define INT128_MAX ((int128_t)(UINT128_MAX >> 1))
+# define INT128_MIN (-INT128_MAX - 1)
+/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals.  */
+#endif
+
+typedef uint128_t secp256k1_uint128;
+typedef int128_t secp256k1_int128;
+
+#endif
--- a/libsecp256k1/src/int128_native_impl.h
+++ b/libsecp256k1/src/int128_native_impl.h
@@ -0,0 +1,94 @@
+#ifndef SECP256K1_INT128_NATIVE_IMPL_H
+#define SECP256K1_INT128_NATIVE_IMPL_H
+
+#include "int128.h"
+#include "util.h"
+
+static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) {
+    *r = (((uint128_t)hi) << 64) + lo;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
+   *r = (uint128_t)a * b;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
+   *r += (uint128_t)a * b;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
+   *r += a;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   *r >>= n;
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
+   return (uint64_t)(*a);
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
+   return (uint64_t)(*a >> 64);
+}
+
+static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
+   *r = a;
+}
+
+static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   return (*r >> n == 0);
+}
+
+static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) {
+    *r = (((uint128_t)(uint64_t)hi) << 64) + lo;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
+   *r = (int128_t)a * b;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
+   int128_t ab = (int128_t)a * b;
+   VERIFY_CHECK(0 <= ab ? *r <= INT128_MAX - ab : INT128_MIN - ab <= *r);
+   *r += ab;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
+   int128_t ad = (int128_t)a * d;
+   int128_t bc = (int128_t)b * c;
+   VERIFY_CHECK(0 <= bc ? INT128_MIN + bc <= ad : ad <= INT128_MAX + bc);
+   *r = ad - bc;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   *r >>= n;
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_i128_to_u64(const secp256k1_int128 *a) {
+   return (uint64_t)*a;
+}
+
+static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
+   VERIFY_CHECK(INT64_MIN <= *a && *a <= INT64_MAX);
+   return *a;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
+   *r = a;
+}
+
+static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
+   return *a == *b;
+}
+
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) {
+   VERIFY_CHECK(n < 127);
+   VERIFY_CHECK(sign == 1 || sign == -1);
+   return (*r == (int128_t)((uint128_t)sign << n));
+}
+
+#endif
--- a/libsecp256k1/src/int128_struct.h
+++ b/libsecp256k1/src/int128_struct.h
@@ -0,0 +1,14 @@
+#ifndef SECP256K1_INT128_STRUCT_H
+#define SECP256K1_INT128_STRUCT_H
+
+#include <stdint.h>
+#include "util.h"
+
+typedef struct {
+  uint64_t lo;
+  uint64_t hi;
+} secp256k1_uint128;
+
+typedef secp256k1_uint128 secp256k1_int128;
+
+#endif
--- a/libsecp256k1/src/int128_struct_impl.h
+++ b/libsecp256k1/src/int128_struct_impl.h
@@ -0,0 +1,205 @@
+#ifndef SECP256K1_INT128_STRUCT_IMPL_H
+#define SECP256K1_INT128_STRUCT_IMPL_H
+
+#include "int128.h"
+#include "util.h"
+
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) /* MSVC */
+#    include <intrin.h>
+#    if defined(_M_ARM64) || defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE)
+/* On ARM64 MSVC, use __(u)mulh for the upper half of 64x64 multiplications.
+   (Define SECP256K1_MSVC_MULH_TEST_OVERRIDE to test this code path on X64,
+   which supports both __(u)mulh and _umul128.) */
+#        if defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE)
+#            pragma message(__FILE__ ": SECP256K1_MSVC_MULH_TEST_OVERRIDE is defined, forcing use of __(u)mulh.")
+#        endif
+static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
+    *hi = __umulh(a, b);
+    return a * b;
+}
+
+static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
+    *hi = __mulh(a, b);
+    return (uint64_t)a * (uint64_t)b;
+}
+#    else
+/* On x84_64 MSVC, use native _(u)mul128 for 64x64->128 multiplications. */
+#        define secp256k1_umul128 _umul128
+#        define secp256k1_mul128 _mul128
+#    endif
+#else
+/* On other systems, emulate 64x64->128 multiplications using 32x32->64 multiplications. */
+static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
+    uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
+    uint64_t lh = (uint32_t)a * (b >> 32);
+    uint64_t hl = (a >> 32) * (uint32_t)b;
+    uint64_t hh = (a >> 32) * (b >> 32);
+    uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
+    *hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
+    return (mid34 << 32) + (uint32_t)ll;
+}
+
+static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
+    uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
+    int64_t lh = (uint32_t)a * (b >> 32);
+    int64_t hl = (a >> 32) * (uint32_t)b;
+    int64_t hh = (a >> 32) * (b >> 32);
+    uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
+    *hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
+    return (mid34 << 32) + (uint32_t)ll;
+}
+#endif
+
+static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) {
+    r->hi = hi;
+    r->lo = lo;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
+   r->lo = secp256k1_umul128(a, b, &r->hi);
+}
+
+static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
+   uint64_t lo, hi;
+   lo = secp256k1_umul128(a, b, &hi);
+   r->lo += lo;
+   r->hi += hi + (r->lo < lo);
+}
+
+static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
+   r->lo += a;
+   r->hi += r->lo < a;
+}
+
+/* Unsigned (logical) right shift.
+ * Non-constant time in n.
+ */
+static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   if (n >= 64) {
+     r->lo = r->hi >> (n-64);
+     r->hi = 0;
+   } else if (n > 0) {
+#if defined(_MSC_VER) && defined(_M_X64)
+     VERIFY_CHECK(n < 64);
+     r->lo = __shiftright128(r->lo, r->hi, n);
+#else
+     r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
+#endif
+     r->hi >>= n;
+   }
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
+   return a->lo;
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
+   return a->hi;
+}
+
+static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
+   r->hi = 0;
+   r->lo = a;
+}
+
+static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   return n >= 64 ? r->hi >> (n - 64) == 0
+                  : r->hi == 0 && r->lo >> n == 0;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) {
+    r->hi = hi;
+    r->lo = lo;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
+   int64_t hi;
+   r->lo = (uint64_t)secp256k1_mul128(a, b, &hi);
+   r->hi = (uint64_t)hi;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
+   int64_t hi;
+   uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
+   r->lo += lo;
+   hi += r->lo < lo;
+   /* Verify no overflow.
+    * If r represents a positive value (the sign bit is not set) and the value we are adding is a positive value (the sign bit is not set),
+    * then we require that the resulting value also be positive (the sign bit is not set).
+    * Note that (X <= Y) means (X implies Y) when X and Y are boolean values (i.e. 0 or 1).
+    */
+   VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi <= 0x7fffffffffffffffu));
+   /* Verify no underflow.
+    * If r represents a negative value (the sign bit is set) and the value we are adding is a negative value (the sign bit is set),
+    * then we require that the resulting value also be negative (the sign bit is set).
+    */
+   VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi > 0x7fffffffffffffffu));
+   r->hi += hi;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_dissip_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
+   int64_t hi;
+   uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
+   hi += r->lo < lo;
+   /* Verify no overflow.
+    * If r represents a positive value (the sign bit is not set) and the value we are subtracting is a negative value (the sign bit is set),
+    * then we require that the resulting value also be positive (the sign bit is not set).
+    */
+   VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi <= 0x7fffffffffffffffu));
+   /* Verify no underflow.
+    * If r represents a negative value (the sign bit is set) and the value we are subtracting is a positive value (the sign sign bit is not set),
+    * then we require that the resulting value also be negative (the sign bit is set).
+    */
+   VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi > 0x7fffffffffffffffu));
+   r->hi -= hi;
+   r->lo -= lo;
+}
+
+static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
+   secp256k1_i128_mul(r, a, d);
+   secp256k1_i128_dissip_mul(r, b, c);
+}
+
+/* Signed (arithmetic) right shift.
+ * Non-constant time in n.
+ */
+static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
+   VERIFY_CHECK(n < 128);
+   if (n >= 64) {
+     r->lo = (uint64_t)((int64_t)(r->hi) >> (n-64));
+     r->hi = (uint64_t)((int64_t)(r->hi) >> 63);
+   } else if (n > 0) {
+     r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
+     r->hi = (uint64_t)((int64_t)(r->hi) >> n);
+   }
+}
+
+static SECP256K1_INLINE uint64_t secp256k1_i128_to_u64(const secp256k1_int128 *a) {
+   return a->lo;
+}
+
+static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
+   /* Verify that a represents a 64 bit signed value by checking that the high bits are a sign extension of the low bits. */
+   VERIFY_CHECK(a->hi == -(a->lo >> 63));
+   return (int64_t)secp256k1_i128_to_u64(a);
+}
+
+static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
+   r->hi = (uint64_t)(a >> 63);
+   r->lo = (uint64_t)a;
+}
+
+static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
+   return a->hi == b->hi && a->lo == b->lo;
+}
+
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) {
+    VERIFY_CHECK(n < 127);
+    VERIFY_CHECK(sign == 1 || sign == -1);
+    return n >= 64 ? r->hi == (uint64_t)sign << (n - 64) && r->lo == 0
+                   : r->hi == (uint64_t)(sign >> 1) && r->lo == (uint64_t)sign << n;
+}
+
+#endif
--- a/libsecp256k1/src/modinv32.h
+++ b/libsecp256k1/src/modinv32.h
@@ -0,0 +1,43 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV32_H
+#define SECP256K1_MODINV32_H
+
+#include "util.h"
+
+/* A signed 30-bit limb representation of integers.
+ *
+ * Its value is sum(v[i] * 2^(30*i), i=0..8). */
+typedef struct {
+    int32_t v[9];
+} secp256k1_modinv32_signed30;
+
+typedef struct {
+    /* The modulus in signed30 notation, must be odd and in [3, 2^256]. */
+    secp256k1_modinv32_signed30 modulus;
+
+    /* modulus^{-1} mod 2^30 */
+    uint32_t modulus_inv30;
+} secp256k1_modinv32_modinfo;
+
+/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus).
+ * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of
+ * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime.
+ *
+ * On output, all of x's limbs will be in [0, 2^30).
+ */
+static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
+/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
+static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
+/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
+ * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
+ * cannot be computed. */
+static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
+#endif /* SECP256K1_MODINV32_H */
--- a/libsecp256k1/src/modinv32_impl.h
+++ b/libsecp256k1/src/modinv32_impl.h
@@ -0,0 +1,725 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV32_IMPL_H
+#define SECP256K1_MODINV32_IMPL_H
+
+#include "modinv32.h"
+
+#include "util.h"
+
+#include <stdlib.h>
+
+/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
+ * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+ *
+ * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
+ * implementation for N=30, using 30-bit signed limbs represented as int32_t.
+ */
+
+#ifdef VERIFY
+static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
+
+/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
+static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
+    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+    int64_t c = 0;
+    int i;
+    for (i = 0; i < 8; ++i) {
+        if (i < alen) c += (int64_t)a->v[i] * factor;
+        r->v[i] = (int32_t)c & M30; c >>= 30;
+    }
+    if (8 < alen) c += (int64_t)a->v[8] * factor;
+    VERIFY_CHECK(c == (int32_t)c);
+    r->v[8] = (int32_t)c;
+}
+
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
+static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
+    int i;
+    secp256k1_modinv32_signed30 am, bm;
+    secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
+    secp256k1_modinv32_mul_30(&bm, b, 9, factor);
+    for (i = 0; i < 8; ++i) {
+        /* Verify that all but the top limb of a and b are normalized. */
+        VERIFY_CHECK(am.v[i] >> 30 == 0);
+        VERIFY_CHECK(bm.v[i] >> 30 == 0);
+    }
+    for (i = 8; i >= 0; --i) {
+        if (am.v[i] < bm.v[i]) return -1;
+        if (am.v[i] > bm.v[i]) return 1;
+    }
+    return 0;
+}
+#endif
+
+/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
+ * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
+ * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
+ * [0,2^30). */
+static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) {
+    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+    int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
+            r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
+    volatile int32_t cond_add, cond_negate;
+
+#ifdef VERIFY
+    /* Verify that all limbs are in range (-2^30,2^30). */
+    int i;
+    for (i = 0; i < 9; ++i) {
+        VERIFY_CHECK(r->v[i] >= -M30);
+        VERIFY_CHECK(r->v[i] <= M30);
+    }
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+
+    /* In a first step, add the modulus if the input is negative, and then negate if requested.
+     * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
+     * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
+     * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
+     * indeed the behavior of the right shift operator). */
+    cond_add = r8 >> 31;
+    r0 += modinfo->modulus.v[0] & cond_add;
+    r1 += modinfo->modulus.v[1] & cond_add;
+    r2 += modinfo->modulus.v[2] & cond_add;
+    r3 += modinfo->modulus.v[3] & cond_add;
+    r4 += modinfo->modulus.v[4] & cond_add;
+    r5 += modinfo->modulus.v[5] & cond_add;
+    r6 += modinfo->modulus.v[6] & cond_add;
+    r7 += modinfo->modulus.v[7] & cond_add;
+    r8 += modinfo->modulus.v[8] & cond_add;
+    cond_negate = sign >> 31;
+    r0 = (r0 ^ cond_negate) - cond_negate;
+    r1 = (r1 ^ cond_negate) - cond_negate;
+    r2 = (r2 ^ cond_negate) - cond_negate;
+    r3 = (r3 ^ cond_negate) - cond_negate;
+    r4 = (r4 ^ cond_negate) - cond_negate;
+    r5 = (r5 ^ cond_negate) - cond_negate;
+    r6 = (r6 ^ cond_negate) - cond_negate;
+    r7 = (r7 ^ cond_negate) - cond_negate;
+    r8 = (r8 ^ cond_negate) - cond_negate;
+    /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
+    r1 += r0 >> 30; r0 &= M30;
+    r2 += r1 >> 30; r1 &= M30;
+    r3 += r2 >> 30; r2 &= M30;
+    r4 += r3 >> 30; r3 &= M30;
+    r5 += r4 >> 30; r4 &= M30;
+    r6 += r5 >> 30; r5 &= M30;
+    r7 += r6 >> 30; r6 &= M30;
+    r8 += r7 >> 30; r7 &= M30;
+
+    /* In a second step add the modulus again if the result is still negative, bringing r to range
+     * [0,modulus). */
+    cond_add = r8 >> 31;
+    r0 += modinfo->modulus.v[0] & cond_add;
+    r1 += modinfo->modulus.v[1] & cond_add;
+    r2 += modinfo->modulus.v[2] & cond_add;
+    r3 += modinfo->modulus.v[3] & cond_add;
+    r4 += modinfo->modulus.v[4] & cond_add;
+    r5 += modinfo->modulus.v[5] & cond_add;
+    r6 += modinfo->modulus.v[6] & cond_add;
+    r7 += modinfo->modulus.v[7] & cond_add;
+    r8 += modinfo->modulus.v[8] & cond_add;
+    /* And propagate again. */
+    r1 += r0 >> 30; r0 &= M30;
+    r2 += r1 >> 30; r1 &= M30;
+    r3 += r2 >> 30; r2 &= M30;
+    r4 += r3 >> 30; r3 &= M30;
+    r5 += r4 >> 30; r4 &= M30;
+    r6 += r5 >> 30; r5 &= M30;
+    r7 += r6 >> 30; r6 &= M30;
+    r8 += r7 >> 30; r7 &= M30;
+
+    r->v[0] = r0;
+    r->v[1] = r1;
+    r->v[2] = r2;
+    r->v[3] = r3;
+    r->v[4] = r4;
+    r->v[5] = r5;
+    r->v[6] = r6;
+    r->v[7] = r7;
+    r->v[8] = r8;
+
+    VERIFY_CHECK(r0 >> 30 == 0);
+    VERIFY_CHECK(r1 >> 30 == 0);
+    VERIFY_CHECK(r2 >> 30 == 0);
+    VERIFY_CHECK(r3 >> 30 == 0);
+    VERIFY_CHECK(r4 >> 30 == 0);
+    VERIFY_CHECK(r5 >> 30 == 0);
+    VERIFY_CHECK(r6 >> 30 == 0);
+    VERIFY_CHECK(r7 >> 30 == 0);
+    VERIFY_CHECK(r8 >> 30 == 0);
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
+}
+
+/* Data type for transition matrices (see section 3 of explanation).
+ *
+ * t = [ u  v ]
+ *     [ q  r ]
+ */
+typedef struct {
+    int32_t u, v, q, r;
+} secp256k1_modinv32_trans2x2;
+
+/* Compute the transition matrix and zeta for 30 divsteps.
+ *
+ * Input:  zeta: initial zeta
+ *         f0:   bottom limb of initial f
+ *         g0:   bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final zeta
+ *
+ * Implements the divsteps_n_matrix function from the explanation.
+ */
+static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
+    /* u,v,q,r are the elements of the transformation matrix being built up,
+     * starting with the identity matrix. Semantically they are signed integers
+     * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
+     * permits left shifting (which is UB for negative numbers). The range
+     * being inside [-2^31,2^31) means that casting to signed works correctly.
+     */
+    uint32_t u = 1, v = 0, q = 0, r = 1;
+    volatile uint32_t c1, c2;
+    uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
+    int i;
+
+    for (i = 0; i < 30; ++i) {
+        VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
+        VERIFY_CHECK((u * f0 + v * g0) == f << i);
+        VERIFY_CHECK((q * f0 + r * g0) == g << i);
+        /* Compute conditional masks for (zeta < 0) and for (g & 1). */
+        c1 = zeta >> 31;
+        mask1 = c1;
+        c2 = g & 1;
+        mask2 = -c2;
+        /* Compute x,y,z, conditionally negated versions of f,u,v. */
+        x = (f ^ mask1) - mask1;
+        y = (u ^ mask1) - mask1;
+        z = (v ^ mask1) - mask1;
+        /* Conditionally add x,y,z to g,q,r. */
+        g += x & mask2;
+        q += y & mask2;
+        r += z & mask2;
+        /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
+        mask1 &= mask2;
+        /* Conditionally change zeta into -zeta-2 or zeta-1. */
+        zeta = (zeta ^ mask1) - 1;
+        /* Conditionally add g,q,r to f,u,v. */
+        f += g & mask1;
+        u += q & mask1;
+        v += r & mask1;
+        /* Shifts */
+        g >>= 1;
+        u <<= 1;
+        v <<= 1;
+        /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
+        VERIFY_CHECK(zeta >= -601 && zeta <= 601);
+    }
+    /* Return data in t and return value. */
+    t->u = (int32_t)u;
+    t->v = (int32_t)v;
+    t->q = (int32_t)q;
+    t->r = (int32_t)r;
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2, the
+     * aggregate of 30 of them will have determinant 2^30. */
+    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
+    return zeta;
+}
+
+/* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
+static const uint8_t secp256k1_modinv32_inv256[128] = {
+    0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
+    0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
+    0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
+    0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
+    0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
+    0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
+    0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
+    0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
+    0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
+    0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
+    0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
+};
+
+/* Compute the transition matrix and eta for 30 divsteps (variable time).
+ *
+ * Input:  eta: initial eta
+ *         f0:  bottom limb of initial f
+ *         g0:  bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix_var function from the explanation.
+ */
+static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
+    /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
+    uint32_t u = 1, v = 0, q = 0, r = 1;
+    uint32_t f = f0, g = g0, m;
+    uint16_t w;
+    int i = 30, limit, zeros;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+         /* We're done once we've done 30 divsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
+        /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
+        VERIFY_CHECK(eta >= -751 && eta <= 751);
+        /* If eta is negative, negate it and replace f,g with g,-f. */
+        if (eta < 0) {
+            uint32_t tmp;
+            eta = -eta;
+            tmp = f; f = g; g = -tmp;
+            tmp = u; u = q; q = -tmp;
+            tmp = v; v = r; r = -tmp;
+        }
+        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
+         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
+         * can be done as its sign will flip once that happens. */
+        limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
+        VERIFY_CHECK(limit > 0 && limit <= 30);
+        m = (UINT32_MAX >> (32 - limit)) & 255U;
+        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
+        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
+        /* Do so. */
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int32_t)u;
+    t->v = (int32_t)v;
+    t->q = (int32_t)q;
+    t->r = (int32_t)r;
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2, the
+     * aggregate of 30 of them will have determinant 2^30. */
+    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
+    return eta;
+}
+
+/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
+ * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
+ * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
+ *
+ * Input:        eta: initial eta
+ *               f0:  bottom limb of initial f
+ *               g0:  bottom limb of initial g
+ * Output:       t: transition matrix
+ * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
+ *               by applying the returned transformation matrix to it. The other bits of *jacp may
+ *               change, but are meaningless.
+ * Return: final eta
+ */
+static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
+    /* Transformation matrix. */
+    uint32_t u = 1, v = 0, q = 0, r = 1;
+    uint32_t f = f0, g = g0, m;
+    uint16_t w;
+    int i = 30, limit, zeros;
+    int jac = *jacp;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+        /* Update the bottom bit of jac: when dividing g by an odd power of 2,
+         * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
+        jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
+        /* We're done once we've done 30 posdivsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
+        /* If eta is negative, negate it and replace f,g with g,f. */
+        if (eta < 0) {
+            uint32_t tmp;
+            eta = -eta;
+            /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
+             * if both f and g are 3 mod 4. */
+            jac ^= ((f & g) >> 1);
+            tmp = f; f = g; g = tmp;
+            tmp = u; u = q; q = tmp;
+            tmp = v; v = r; r = tmp;
+        }
+        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
+         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
+         * can be done as its sign will flip once that happens. */
+        limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
+        VERIFY_CHECK(limit > 0 && limit <= 30);
+        m = (UINT32_MAX >> (32 - limit)) & 255U;
+        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
+        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
+        /* Do so. */
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int32_t)u;
+    t->v = (int32_t)v;
+    t->q = (int32_t)q;
+    t->r = (int32_t)r;
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
+     * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
+    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
+                 (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
+    *jacp = jac;
+    return eta;
+}
+
+/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
+ *
+ * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
+ * (-2^30,2^30).
+ *
+ * This implements the update_de function from the explanation.
+ */
+static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) {
+    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+    int32_t di, ei, md, me, sd, se;
+    int64_t cd, ce;
+    int i;
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
+    VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
+    VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
+
+    /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
+    sd = d->v[8] >> 31;
+    se = e->v[8] >> 31;
+    md = (u & sd) + (v & se);
+    me = (q & sd) + (r & se);
+    /* Begin computing t*[d,e]. */
+    di = d->v[0];
+    ei = e->v[0];
+    cd = (int64_t)u * di + (int64_t)v * ei;
+    ce = (int64_t)q * di + (int64_t)r * ei;
+    /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
+    md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
+    me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
+    /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
+    cd += (int64_t)modinfo->modulus.v[0] * md;
+    ce += (int64_t)modinfo->modulus.v[0] * me;
+    /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
+    VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
+    VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
+    /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
+     * limb i-1 (shifting down by 30 bits). */
+    for (i = 1; i < 9; ++i) {
+        di = d->v[i];
+        ei = e->v[i];
+        cd += (int64_t)u * di + (int64_t)v * ei;
+        ce += (int64_t)q * di + (int64_t)r * ei;
+        cd += (int64_t)modinfo->modulus.v[i] * md;
+        ce += (int64_t)modinfo->modulus.v[i] * me;
+        d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
+        e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
+    }
+    /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
+    d->v[8] = (int32_t)cd;
+    e->v[8] = (int32_t)ce;
+
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
+}
+
+/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
+    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+    int32_t fi, gi;
+    int64_t cf, cg;
+    int i;
+    /* Start computing t*[f,g]. */
+    fi = f->v[0];
+    gi = g->v[0];
+    cf = (int64_t)u * fi + (int64_t)v * gi;
+    cg = (int64_t)q * fi + (int64_t)r * gi;
+    /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
+    VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
+    VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
+    /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
+     * down by 30 bits). */
+    for (i = 1; i < 9; ++i) {
+        fi = f->v[i];
+        gi = g->v[i];
+        cf += (int64_t)u * fi + (int64_t)v * gi;
+        cg += (int64_t)q * fi + (int64_t)r * gi;
+        f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
+        g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
+    }
+    /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
+    f->v[8] = (int32_t)cf;
+    g->v[8] = (int32_t)cg;
+}
+
+/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
+ *
+ * Version that operates on a variable number of limbs in f and g.
+ *
+ * This implements the update_fg function from the explanation in modinv64_impl.h.
+ */
+static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
+    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
+    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
+    int32_t fi, gi;
+    int64_t cf, cg;
+    int i;
+    VERIFY_CHECK(len > 0);
+    /* Start computing t*[f,g]. */
+    fi = f->v[0];
+    gi = g->v[0];
+    cf = (int64_t)u * fi + (int64_t)v * gi;
+    cg = (int64_t)q * fi + (int64_t)r * gi;
+    /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+    VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
+    VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
+    /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
+     * down by 30 bits). */
+    for (i = 1; i < len; ++i) {
+        fi = f->v[i];
+        gi = g->v[i];
+        cf += (int64_t)u * fi + (int64_t)v * gi;
+        cg += (int64_t)q * fi + (int64_t)r * gi;
+        f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
+        g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
+    }
+    /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
+    f->v[len - 1] = (int32_t)cf;
+    g->v[len - 1] = (int32_t)cg;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
+static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+    /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
+    secp256k1_modinv32_signed30 d = {{0}};
+    secp256k1_modinv32_signed30 e = {{1}};
+    secp256k1_modinv32_signed30 f = modinfo->modulus;
+    secp256k1_modinv32_signed30 g = *x;
+    int i;
+    int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
+
+    /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
+    for (i = 0; i < 20; ++i) {
+        /* Compute transition matrix and new zeta after 30 divsteps. */
+        secp256k1_modinv32_trans2x2 t;
+        zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
+        /* Update d,e using that transition matrix. */
+        secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
+        /* Update f,g using that transition matrix. */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+
+        secp256k1_modinv32_update_fg_30(&f, &g, &t);
+
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+    }
+
+    /* At this point sufficient iterations have been performed that g must have reached 0
+     * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
+     * values i.e. +/- 1, and d now contains +/- the modular inverse. */
+
+    /* g == 0 */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
+    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
+                 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
+                 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+                  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+                  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0));
+
+    /* Optionally negate d, normalize to [0,modulus), and return it. */
+    secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
+    *x = d;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
+static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+    /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+    secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
+    secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
+    secp256k1_modinv32_signed30 f = modinfo->modulus;
+    secp256k1_modinv32_signed30 g = *x;
+#ifdef VERIFY
+    int i = 0;
+#endif
+    int j, len = 9;
+    int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
+    int32_t cond, fn, gn;
+
+    /* Do iterations of 30 divsteps each until g=0. */
+    while (1) {
+        /* Compute transition matrix and new eta after 30 divsteps. */
+        secp256k1_modinv32_trans2x2 t;
+        eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
+        /* Update d,e using that transition matrix. */
+        secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
+        /* Update f,g using that transition matrix. */
+
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+
+        secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
+        /* If the bottom limb of g is 0, there is a chance g=0. */
+        if (g.v[0] == 0) {
+            cond = 0;
+            /* Check if all other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= g.v[j];
+            }
+            /* If so, we're done. */
+            if (cond == 0) break;
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int32_t)len - 2) >> 31;
+        cond |= fn ^ (fn >> 31);
+        cond |= gn ^ (gn >> 31);
+        /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
+        if (cond == 0) {
+            f.v[len - 2] |= (uint32_t)fn << 30;
+            g.v[len - 2] |= (uint32_t)gn << 30;
+            --len;
+        }
+
+        VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+    }
+
+    /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
+     * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
+
+    /* g == 0 */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
+    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
+    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
+                 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
+                 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+                  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
+                  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0));
+
+    /* Optionally negate d, normalize to [0,modulus), and return it. */
+    secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
+    *x = d;
+}
+
+/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
+ * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
+#ifdef VERIFY
+#define JACOBI32_ITERATIONS 25
+#else
+#define JACOBI32_ITERATIONS 50
+#endif
+
+/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
+static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+    /* Start with f=modulus, g=x, eta=-1. */
+    secp256k1_modinv32_signed30 f = modinfo->modulus;
+    secp256k1_modinv32_signed30 g = *x;
+    int j, len = 9;
+    int32_t eta = -1; /* eta = -delta; delta is initially 1 */
+    int32_t cond, fn, gn;
+    int jac = 0;
+    int count;
+
+    /* The input limbs must all be non-negative. */
+    VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
+
+    /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
+     * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
+     * time out). */
+    VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
+
+    for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
+        /* Compute transition matrix and new eta after 30 posdivsteps. */
+        secp256k1_modinv32_trans2x2 t;
+        eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
+        /* Update f,g using that transition matrix. */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+
+        secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
+        /* If the bottom limb of f is 1, there is a chance that f=1. */
+        if (f.v[0] == 1) {
+            cond = 0;
+            /* Check if the other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= f.v[j];
+            }
+            /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
+            if (cond == 0) return 1 - 2*(jac & 1);
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int32_t)len - 2) >> 31;
+        cond |= fn;
+        cond |= gn;
+        /* If so, reduce length. */
+        if (cond == 0) --len;
+
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+    }
+
+    /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
+    return 0;
+}
+
+#endif /* SECP256K1_MODINV32_IMPL_H */
--- a/libsecp256k1/src/modinv64.h
+++ b/libsecp256k1/src/modinv64.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV64_H
+#define SECP256K1_MODINV64_H
+
+#include "util.h"
+
+#ifndef SECP256K1_WIDEMUL_INT128
+#error "modinv64 requires 128-bit wide multiplication support"
+#endif
+
+/* A signed 62-bit limb representation of integers.
+ *
+ * Its value is sum(v[i] * 2^(62*i), i=0..4). */
+typedef struct {
+    int64_t v[5];
+} secp256k1_modinv64_signed62;
+
+typedef struct {
+    /* The modulus in signed62 notation, must be odd and in [3, 2^256]. */
+    secp256k1_modinv64_signed62 modulus;
+
+    /* modulus^{-1} mod 2^62 */
+    uint64_t modulus_inv62;
+} secp256k1_modinv64_modinfo;
+
+/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus).
+ * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of
+ * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime.
+ *
+ * On output, all of x's limbs will be in [0, 2^62).
+ */
+static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
+/* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */
+static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
+/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
+ * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
+ * cannot be computed. */
+static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
+#endif /* SECP256K1_MODINV64_H */
--- a/libsecp256k1/src/modinv64_impl.h
+++ b/libsecp256k1/src/modinv64_impl.h
@@ -0,0 +1,780 @@
+/***********************************************************************
+ * Copyright (c) 2020 Peter Dettman                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef SECP256K1_MODINV64_IMPL_H
+#define SECP256K1_MODINV64_IMPL_H
+
+#include "int128.h"
+#include "modinv64.h"
+
+/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
+ * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+ *
+ * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
+ * implementation for N=62, using 62-bit signed limbs represented as int64_t.
+ */
+
+/* Data type for transition matrices (see section 3 of explanation).
+ *
+ * t = [ u  v ]
+ *     [ q  r ]
+ */
+typedef struct {
+    int64_t u, v, q, r;
+} secp256k1_modinv64_trans2x2;
+
+#ifdef VERIFY
+/* Helper function to compute the absolute value of an int64_t.
+ * (we don't use abs/labs/llabs as it depends on the int sizes). */
+static int64_t secp256k1_modinv64_abs(int64_t v) {
+    VERIFY_CHECK(v > INT64_MIN);
+    if (v < 0) return -v;
+    return v;
+}
+
+static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}};
+
+/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */
+static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) {
+    const uint64_t M62 = UINT64_MAX >> 2;
+    secp256k1_int128 c, d;
+    int i;
+    secp256k1_i128_from_i64(&c, 0);
+    for (i = 0; i < 4; ++i) {
+        if (i < alen) secp256k1_i128_accum_mul(&c, a->v[i], factor);
+        r->v[i] = secp256k1_i128_to_u64(&c) & M62; secp256k1_i128_rshift(&c, 62);
+    }
+    if (4 < alen) secp256k1_i128_accum_mul(&c, a->v[4], factor);
+    secp256k1_i128_from_i64(&d, secp256k1_i128_to_i64(&c));
+    VERIFY_CHECK(secp256k1_i128_eq_var(&c, &d));
+    r->v[4] = secp256k1_i128_to_i64(&c);
+}
+
+/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
+static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, int alen, const secp256k1_modinv64_signed62 *b, int64_t factor) {
+    int i;
+    secp256k1_modinv64_signed62 am, bm;
+    secp256k1_modinv64_mul_62(&am, a, alen, 1); /* Normalize all but the top limb of a. */
+    secp256k1_modinv64_mul_62(&bm, b, 5, factor);
+    for (i = 0; i < 4; ++i) {
+        /* Verify that all but the top limb of a and b are normalized. */
+        VERIFY_CHECK(am.v[i] >> 62 == 0);
+        VERIFY_CHECK(bm.v[i] >> 62 == 0);
+    }
+    for (i = 4; i >= 0; --i) {
+        if (am.v[i] < bm.v[i]) return -1;
+        if (am.v[i] > bm.v[i]) return 1;
+    }
+    return 0;
+}
+
+/* Check if the determinant of t is equal to 1 << n. If abs, check if |det t| == 1 << n. */
+static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n, int abs) {
+    secp256k1_int128 a;
+    secp256k1_i128_det(&a, t->u, t->v, t->q, t->r);
+    if (secp256k1_i128_check_pow2(&a, n, 1)) return 1;
+    if (abs && secp256k1_i128_check_pow2(&a, n, -1)) return 1;
+    return 0;
+}
+#endif
+
+/* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus
+ * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
+ * process. The input must have limbs in range (-2^62,2^62). The output will have limbs in range
+ * [0,2^62). */
+static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int64_t sign, const secp256k1_modinv64_modinfo *modinfo) {
+    const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
+    int64_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4];
+    volatile int64_t cond_add, cond_negate;
+
+#ifdef VERIFY
+    /* Verify that all limbs are in range (-2^62,2^62). */
+    int i;
+    for (i = 0; i < 5; ++i) {
+        VERIFY_CHECK(r->v[i] >= -M62);
+        VERIFY_CHECK(r->v[i] <= M62);
+    }
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
+#endif
+
+    /* In a first step, add the modulus if the input is negative, and then negate if requested.
+     * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
+     * limbs are in range (-2^62,2^62), this cannot overflow an int64_t. Note that the right
+     * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
+     * indeed the behavior of the right shift operator). */
+    cond_add = r4 >> 63;
+    r0 += modinfo->modulus.v[0] & cond_add;
+    r1 += modinfo->modulus.v[1] & cond_add;
+    r2 += modinfo->modulus.v[2] & cond_add;
+    r3 += modinfo->modulus.v[3] & cond_add;
+    r4 += modinfo->modulus.v[4] & cond_add;
+    cond_negate = sign >> 63;
+    r0 = (r0 ^ cond_negate) - cond_negate;
+    r1 = (r1 ^ cond_negate) - cond_negate;
+    r2 = (r2 ^ cond_negate) - cond_negate;
+    r3 = (r3 ^ cond_negate) - cond_negate;
+    r4 = (r4 ^ cond_negate) - cond_negate;
+    /* Propagate the top bits, to bring limbs back to range (-2^62,2^62). */
+    r1 += r0 >> 62; r0 &= M62;
+    r2 += r1 >> 62; r1 &= M62;
+    r3 += r2 >> 62; r2 &= M62;
+    r4 += r3 >> 62; r3 &= M62;
+
+    /* In a second step add the modulus again if the result is still negative, bringing
+     * r to range [0,modulus). */
+    cond_add = r4 >> 63;
+    r0 += modinfo->modulus.v[0] & cond_add;
+    r1 += modinfo->modulus.v[1] & cond_add;
+    r2 += modinfo->modulus.v[2] & cond_add;
+    r3 += modinfo->modulus.v[3] & cond_add;
+    r4 += modinfo->modulus.v[4] & cond_add;
+    /* And propagate again. */
+    r1 += r0 >> 62; r0 &= M62;
+    r2 += r1 >> 62; r1 &= M62;
+    r3 += r2 >> 62; r2 &= M62;
+    r4 += r3 >> 62; r3 &= M62;
+
+    r->v[0] = r0;
+    r->v[1] = r1;
+    r->v[2] = r2;
+    r->v[3] = r3;
+    r->v[4] = r4;
+
+    VERIFY_CHECK(r0 >> 62 == 0);
+    VERIFY_CHECK(r1 >> 62 == 0);
+    VERIFY_CHECK(r2 >> 62 == 0);
+    VERIFY_CHECK(r3 >> 62 == 0);
+    VERIFY_CHECK(r4 >> 62 == 0);
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 0) >= 0); /* r >= 0 */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(r, 5, &modinfo->modulus, 1) < 0); /* r < modulus */
+}
+
+/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)).
+ * Note that the transformation matrix is scaled by 2^62 and not 2^59.
+ *
+ * Input:  zeta: initial zeta
+ *         f0:   bottom limb of initial f
+ *         g0:   bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final zeta
+ *
+ * Implements the divsteps_n_matrix function from the explanation.
+ */
+static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
+    /* u,v,q,r are the elements of the transformation matrix being built up,
+     * starting with the identity matrix times 8 (because the caller expects
+     * a result scaled by 2^62). Semantically they are signed integers
+     * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This
+     * permits left shifting (which is UB for negative numbers). The range
+     * being inside [-2^63,2^63) means that casting to signed works correctly.
+     */
+    uint64_t u = 8, v = 0, q = 0, r = 8;
+    volatile uint64_t c1, c2;
+    uint64_t mask1, mask2, f = f0, g = g0, x, y, z;
+    int i;
+
+    for (i = 3; i < 62; ++i) {
+        VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
+        VERIFY_CHECK((u * f0 + v * g0) == f << i);
+        VERIFY_CHECK((q * f0 + r * g0) == g << i);
+        /* Compute conditional masks for (zeta < 0) and for (g & 1). */
+        c1 = zeta >> 63;
+        mask1 = c1;
+        c2 = g & 1;
+        mask2 = -c2;
+        /* Compute x,y,z, conditionally negated versions of f,u,v. */
+        x = (f ^ mask1) - mask1;
+        y = (u ^ mask1) - mask1;
+        z = (v ^ mask1) - mask1;
+        /* Conditionally add x,y,z to g,q,r. */
+        g += x & mask2;
+        q += y & mask2;
+        r += z & mask2;
+        /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
+        mask1 &= mask2;
+        /* Conditionally change zeta into -zeta-2 or zeta-1. */
+        zeta = (zeta ^ mask1) - 1;
+        /* Conditionally add g,q,r to f,u,v. */
+        f += g & mask1;
+        u += q & mask1;
+        v += r & mask1;
+        /* Shifts */
+        g >>= 1;
+        u <<= 1;
+        v <<= 1;
+        /* Bounds on zeta that follow from the bounds on iteration count (max 10*59 divsteps). */
+        VERIFY_CHECK(zeta >= -591 && zeta <= 591);
+    }
+    /* Return data in t and return value. */
+    t->u = (int64_t)u;
+    t->v = (int64_t)v;
+    t->q = (int64_t)q;
+    t->r = (int64_t)r;
+
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2, the
+     * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
+     * 8*identity (which has determinant 2^6) means the overall outputs has determinant
+     * 2^65. */
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65, 0));
+
+    return zeta;
+}
+
+/* Compute the transition matrix and eta for 62 divsteps (variable time, eta=-delta).
+ *
+ * Input:  eta: initial eta
+ *         f0:  bottom limb of initial f
+ *         g0:  bottom limb of initial g
+ * Output: t: transition matrix
+ * Return: final eta
+ *
+ * Implements the divsteps_n_matrix_var function from the explanation.
+ */
+static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
+    /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
+    uint64_t u = 1, v = 0, q = 0, r = 1;
+    uint64_t f = f0, g = g0, m;
+    uint32_t w;
+    int i = 62, limit, zeros;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+        /* We're done once we've done 62 divsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i));
+        /* Bounds on eta that follow from the bounds on iteration count (max 12*62 divsteps). */
+        VERIFY_CHECK(eta >= -745 && eta <= 745);
+        /* If eta is negative, negate it and replace f,g with g,-f. */
+        if (eta < 0) {
+            uint64_t tmp;
+            eta = -eta;
+            tmp = f; f = g; g = -tmp;
+            tmp = u; u = q; q = -tmp;
+            tmp = v; v = r; r = -tmp;
+            /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
+             * out (as we'd be done before that point), and no more than eta+1 can be done as its
+             * sign will flip again once that happens. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 6) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 63U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6)
+             * bits. */
+            w = (f * g * (f * f - 2)) & m;
+        } else {
+            /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as
+             * eta tends to be smaller here. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 4) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 15U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4)
+             * bits. */
+            w = f + (((f + 1) & 4) << 1);
+            w = (-w * g) & m;
+        }
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int64_t)u;
+    t->v = (int64_t)v;
+    t->q = (int64_t)q;
+    t->r = (int64_t)r;
+
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2, the
+     * aggregate of 62 of them will have determinant 2^62. */
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 0));
+
+    return eta;
+}
+
+/* Compute the transition matrix and eta for 62 posdivsteps (variable time, eta=-delta), and keeps track
+ * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^64 rather than 2^62, because
+ * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
+ *
+ * Input:        eta: initial eta
+ *               f0:  bottom limb of initial f
+ *               g0:  bottom limb of initial g
+ * Output:       t: transition matrix
+ * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
+ *               by applying the returned transformation matrix to it. The other bits of *jacp may
+ *               change, but are meaningless.
+ * Return:       final eta
+ */
+static int64_t secp256k1_modinv64_posdivsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t, int *jacp) {
+    /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
+    uint64_t u = 1, v = 0, q = 0, r = 1;
+    uint64_t f = f0, g = g0, m;
+    uint32_t w;
+    int i = 62, limit, zeros;
+    int jac = *jacp;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+        /* Update the bottom bit of jac: when dividing g by an odd power of 2,
+         * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
+        jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
+        /* We're done once we've done 62 posdivsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i));
+        /* If eta is negative, negate it and replace f,g with g,f. */
+        if (eta < 0) {
+            uint64_t tmp;
+            eta = -eta;
+            tmp = f; f = g; g = tmp;
+            tmp = u; u = q; q = tmp;
+            tmp = v; v = r; r = tmp;
+            /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
+             * if both f and g are 3 mod 4. */
+            jac ^= ((f & g) >> 1);
+            /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
+             * out (as we'd be done before that point), and no more than eta+1 can be done as its
+             * sign will flip again once that happens. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 6) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 63U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6)
+             * bits. */
+            w = (f * g * (f * f - 2)) & m;
+        } else {
+            /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as
+             * eta tends to be smaller here. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 4) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 15U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4)
+             * bits. */
+            w = f + (((f + 1) & 4) << 1);
+            w = (-w * g) & m;
+        }
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int64_t)u;
+    t->v = (int64_t)v;
+    t->q = (int64_t)q;
+    t->r = (int64_t)r;
+
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
+     * the aggregate of 62 of them will have determinant 2^62 or -2^62. */
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 1));
+
+    *jacp = jac;
+    return eta;
+}
+
+/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix scaled by 2^62.
+ *
+ * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
+ * (-2^62,2^62).
+ *
+ * This implements the update_de function from the explanation.
+ */
+static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp256k1_modinv64_signed62 *e, const secp256k1_modinv64_trans2x2 *t, const secp256k1_modinv64_modinfo* modinfo) {
+    const uint64_t M62 = UINT64_MAX >> 2;
+    const int64_t d0 = d->v[0], d1 = d->v[1], d2 = d->v[2], d3 = d->v[3], d4 = d->v[4];
+    const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4];
+    const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+    int64_t md, me, sd, se;
+    secp256k1_int128 cd, ce;
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0);  /* d <    modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, 1) < 0);  /* e <    modulus */
+    VERIFY_CHECK(secp256k1_modinv64_abs(u) <= (((int64_t)1 << 62) - secp256k1_modinv64_abs(v))); /* |u|+|v| <= 2^62 */
+    VERIFY_CHECK(secp256k1_modinv64_abs(q) <= (((int64_t)1 << 62) - secp256k1_modinv64_abs(r))); /* |q|+|r| <= 2^62 */
+
+    /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
+    sd = d4 >> 63;
+    se = e4 >> 63;
+    md = (u & sd) + (v & se);
+    me = (q & sd) + (r & se);
+    /* Begin computing t*[d,e]. */
+    secp256k1_i128_mul(&cd, u, d0);
+    secp256k1_i128_accum_mul(&cd, v, e0);
+    secp256k1_i128_mul(&ce, q, d0);
+    secp256k1_i128_accum_mul(&ce, r, e0);
+    /* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */
+    md -= (modinfo->modulus_inv62 * secp256k1_i128_to_u64(&cd) + md) & M62;
+    me -= (modinfo->modulus_inv62 * secp256k1_i128_to_u64(&ce) + me) & M62;
+    /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
+    secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[0], md);
+    secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[0], me);
+    /* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */
+    VERIFY_CHECK((secp256k1_i128_to_u64(&cd) & M62) == 0); secp256k1_i128_rshift(&cd, 62);
+    VERIFY_CHECK((secp256k1_i128_to_u64(&ce) & M62) == 0); secp256k1_i128_rshift(&ce, 62);
+    /* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
+    secp256k1_i128_accum_mul(&cd, u, d1);
+    secp256k1_i128_accum_mul(&cd, v, e1);
+    secp256k1_i128_accum_mul(&ce, q, d1);
+    secp256k1_i128_accum_mul(&ce, r, e1);
+    if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
+        secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[1], md);
+        secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[1], me);
+    }
+    d->v[0] = secp256k1_i128_to_u64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
+    e->v[0] = secp256k1_i128_to_u64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
+    /* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
+    secp256k1_i128_accum_mul(&cd, u, d2);
+    secp256k1_i128_accum_mul(&cd, v, e2);
+    secp256k1_i128_accum_mul(&ce, q, d2);
+    secp256k1_i128_accum_mul(&ce, r, e2);
+    if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
+        secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[2], md);
+        secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[2], me);
+    }
+    d->v[1] = secp256k1_i128_to_u64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
+    e->v[1] = secp256k1_i128_to_u64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
+    /* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
+    secp256k1_i128_accum_mul(&cd, u, d3);
+    secp256k1_i128_accum_mul(&cd, v, e3);
+    secp256k1_i128_accum_mul(&ce, q, d3);
+    secp256k1_i128_accum_mul(&ce, r, e3);
+    if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
+        secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[3], md);
+        secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[3], me);
+    }
+    d->v[2] = secp256k1_i128_to_u64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
+    e->v[2] = secp256k1_i128_to_u64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
+    /* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */
+    secp256k1_i128_accum_mul(&cd, u, d4);
+    secp256k1_i128_accum_mul(&cd, v, e4);
+    secp256k1_i128_accum_mul(&ce, q, d4);
+    secp256k1_i128_accum_mul(&ce, r, e4);
+    secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[4], md);
+    secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[4], me);
+    d->v[3] = secp256k1_i128_to_u64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
+    e->v[3] = secp256k1_i128_to_u64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
+    /* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */
+    d->v[4] = secp256k1_i128_to_i64(&cd);
+    e->v[4] = secp256k1_i128_to_i64(&ce);
+
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0);  /* d <    modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(e, 5, &modinfo->modulus, 1) < 0);  /* e <    modulus */
+}
+
+/* Compute (t/2^62) * [f, g], where t is a transition matrix scaled by 2^62.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp256k1_modinv64_signed62 *g, const secp256k1_modinv64_trans2x2 *t) {
+    const uint64_t M62 = UINT64_MAX >> 2;
+    const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4];
+    const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4];
+    const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+    secp256k1_int128 cf, cg;
+    /* Start computing t*[f,g]. */
+    secp256k1_i128_mul(&cf, u, f0);
+    secp256k1_i128_accum_mul(&cf, v, g0);
+    secp256k1_i128_mul(&cg, q, f0);
+    secp256k1_i128_accum_mul(&cg, r, g0);
+    /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+    VERIFY_CHECK((secp256k1_i128_to_u64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
+    VERIFY_CHECK((secp256k1_i128_to_u64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
+    /* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */
+    secp256k1_i128_accum_mul(&cf, u, f1);
+    secp256k1_i128_accum_mul(&cf, v, g1);
+    secp256k1_i128_accum_mul(&cg, q, f1);
+    secp256k1_i128_accum_mul(&cg, r, g1);
+    f->v[0] = secp256k1_i128_to_u64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
+    g->v[0] = secp256k1_i128_to_u64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
+    /* Compute limb 2 of t*[f,g], and store it as output limb 1. */
+    secp256k1_i128_accum_mul(&cf, u, f2);
+    secp256k1_i128_accum_mul(&cf, v, g2);
+    secp256k1_i128_accum_mul(&cg, q, f2);
+    secp256k1_i128_accum_mul(&cg, r, g2);
+    f->v[1] = secp256k1_i128_to_u64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
+    g->v[1] = secp256k1_i128_to_u64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
+    /* Compute limb 3 of t*[f,g], and store it as output limb 2. */
+    secp256k1_i128_accum_mul(&cf, u, f3);
+    secp256k1_i128_accum_mul(&cf, v, g3);
+    secp256k1_i128_accum_mul(&cg, q, f3);
+    secp256k1_i128_accum_mul(&cg, r, g3);
+    f->v[2] = secp256k1_i128_to_u64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
+    g->v[2] = secp256k1_i128_to_u64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
+    /* Compute limb 4 of t*[f,g], and store it as output limb 3. */
+    secp256k1_i128_accum_mul(&cf, u, f4);
+    secp256k1_i128_accum_mul(&cf, v, g4);
+    secp256k1_i128_accum_mul(&cg, q, f4);
+    secp256k1_i128_accum_mul(&cg, r, g4);
+    f->v[3] = secp256k1_i128_to_u64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
+    g->v[3] = secp256k1_i128_to_u64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
+    /* What remains is limb 5 of t*[f,g]; store it as output limb 4. */
+    f->v[4] = secp256k1_i128_to_i64(&cf);
+    g->v[4] = secp256k1_i128_to_i64(&cg);
+}
+
+/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
+ *
+ * Version that operates on a variable number of limbs in f and g.
+ *
+ * This implements the update_fg function from the explanation.
+ */
+static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_signed62 *f, secp256k1_modinv64_signed62 *g, const secp256k1_modinv64_trans2x2 *t) {
+    const uint64_t M62 = UINT64_MAX >> 2;
+    const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
+    int64_t fi, gi;
+    secp256k1_int128 cf, cg;
+    int i;
+    VERIFY_CHECK(len > 0);
+    /* Start computing t*[f,g]. */
+    fi = f->v[0];
+    gi = g->v[0];
+    secp256k1_i128_mul(&cf, u, fi);
+    secp256k1_i128_accum_mul(&cf, v, gi);
+    secp256k1_i128_mul(&cg, q, fi);
+    secp256k1_i128_accum_mul(&cg, r, gi);
+    /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
+    VERIFY_CHECK((secp256k1_i128_to_u64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
+    VERIFY_CHECK((secp256k1_i128_to_u64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
+    /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
+     * down by 62 bits). */
+    for (i = 1; i < len; ++i) {
+        fi = f->v[i];
+        gi = g->v[i];
+        secp256k1_i128_accum_mul(&cf, u, fi);
+        secp256k1_i128_accum_mul(&cf, v, gi);
+        secp256k1_i128_accum_mul(&cg, q, fi);
+        secp256k1_i128_accum_mul(&cg, r, gi);
+        f->v[i - 1] = secp256k1_i128_to_u64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
+        g->v[i - 1] = secp256k1_i128_to_u64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
+    }
+    /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
+    f->v[len - 1] = secp256k1_i128_to_i64(&cf);
+    g->v[len - 1] = secp256k1_i128_to_i64(&cg);
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
+static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+    /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
+    secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}};
+    secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}};
+    secp256k1_modinv64_signed62 f = modinfo->modulus;
+    secp256k1_modinv64_signed62 g = *x;
+    int i;
+    int64_t zeta = -1; /* zeta = -(delta+1/2); delta starts at 1/2. */
+
+    /* Do 10 iterations of 59 divsteps each = 590 divsteps. This suffices for 256-bit inputs. */
+    for (i = 0; i < 10; ++i) {
+        /* Compute transition matrix and new zeta after 59 divsteps. */
+        secp256k1_modinv64_trans2x2 t;
+        zeta = secp256k1_modinv64_divsteps_59(zeta, f.v[0], g.v[0], &t);
+        /* Update d,e using that transition matrix. */
+        secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
+        /* Update f,g using that transition matrix. */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+
+        secp256k1_modinv64_update_fg_62(&f, &g, &t);
+
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+    }
+
+    /* At this point sufficient iterations have been performed that g must have reached 0
+     * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
+     * values i.e. +/- 1, and d now contains +/- the modular inverse. */
+
+    /* g == 0 */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, 5, &SECP256K1_SIGNED62_ONE, 0) == 0);
+    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, 5, &SECP256K1_SIGNED62_ONE, -1) == 0 ||
+                 secp256k1_modinv64_mul_cmp_62(&f, 5, &SECP256K1_SIGNED62_ONE, 1) == 0 ||
+                 (secp256k1_modinv64_mul_cmp_62(x, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+                  secp256k1_modinv64_mul_cmp_62(&d, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+                  secp256k1_modinv64_mul_cmp_62(&f, 5, &modinfo->modulus, 1) == 0));
+
+    /* Optionally negate d, normalize to [0,modulus), and return it. */
+    secp256k1_modinv64_normalize_62(&d, f.v[4], modinfo);
+    *x = d;
+}
+
+/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
+static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+    /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
+    secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}};
+    secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}};
+    secp256k1_modinv64_signed62 f = modinfo->modulus;
+    secp256k1_modinv64_signed62 g = *x;
+#ifdef VERIFY
+    int i = 0;
+#endif
+    int j, len = 5;
+    int64_t eta = -1; /* eta = -delta; delta is initially 1 */
+    int64_t cond, fn, gn;
+
+    /* Do iterations of 62 divsteps each until g=0. */
+    while (1) {
+        /* Compute transition matrix and new eta after 62 divsteps. */
+        secp256k1_modinv64_trans2x2 t;
+        eta = secp256k1_modinv64_divsteps_62_var(eta, f.v[0], g.v[0], &t);
+        /* Update d,e using that transition matrix. */
+        secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
+        /* Update f,g using that transition matrix. */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+
+        secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t);
+        /* If the bottom limb of g is zero, there is a chance that g=0. */
+        if (g.v[0] == 0) {
+            cond = 0;
+            /* Check if the other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= g.v[j];
+            }
+            /* If so, we're done. */
+            if (cond == 0) break;
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int64_t)len - 2) >> 63;
+        cond |= fn ^ (fn >> 63);
+        cond |= gn ^ (gn >> 63);
+        /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
+        if (cond == 0) {
+            f.v[len - 2] |= (uint64_t)fn << 62;
+            g.v[len - 2] |= (uint64_t)gn << 62;
+            --len;
+        }
+
+        VERIFY_CHECK(++i < 12); /* We should never need more than 12*62 = 744 divsteps */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
+    }
+
+    /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
+     * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
+
+    /* g == 0 */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &SECP256K1_SIGNED62_ONE, 0) == 0);
+    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
+    VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &SECP256K1_SIGNED62_ONE, -1) == 0 ||
+                 secp256k1_modinv64_mul_cmp_62(&f, len, &SECP256K1_SIGNED62_ONE, 1) == 0 ||
+                 (secp256k1_modinv64_mul_cmp_62(x, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+                  secp256k1_modinv64_mul_cmp_62(&d, 5, &SECP256K1_SIGNED62_ONE, 0) == 0 &&
+                  secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) == 0));
+
+    /* Optionally negate d, normalize to [0,modulus), and return it. */
+    secp256k1_modinv64_normalize_62(&d, f.v[len - 1], modinfo);
+    *x = d;
+}
+
+/* Do up to 25 iterations of 62 posdivsteps (up to 1550 steps; more is extremely rare) each until f=1.
+ * In VERIFY mode use a lower number of iterations (744, close to the median 756), so failure actually occurs. */
+#ifdef VERIFY
+#define JACOBI64_ITERATIONS 12
+#else
+#define JACOBI64_ITERATIONS 25
+#endif
+
+/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
+static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+    /* Start with f=modulus, g=x, eta=-1. */
+    secp256k1_modinv64_signed62 f = modinfo->modulus;
+    secp256k1_modinv64_signed62 g = *x;
+    int j, len = 5;
+    int64_t eta = -1; /* eta = -delta; delta is initially 1 */
+    int64_t cond, fn, gn;
+    int jac = 0;
+    int count;
+
+    /* The input limbs must all be non-negative. */
+    VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0);
+
+    /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
+     * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
+     * time out). */
+    VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4]) != 0);
+
+    for (count = 0; count < JACOBI64_ITERATIONS; ++count) {
+        /* Compute transition matrix and new eta after 62 posdivsteps. */
+        secp256k1_modinv64_trans2x2 t;
+        eta = secp256k1_modinv64_posdivsteps_62_var(eta, f.v[0] | ((uint64_t)f.v[1] << 62), g.v[0] | ((uint64_t)g.v[1] << 62), &t, &jac);
+        /* Update f,g using that transition matrix. */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+
+        secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t);
+        /* If the bottom limb of f is 1, there is a chance that f=1. */
+        if (f.v[0] == 1) {
+            cond = 0;
+            /* Check if the other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= f.v[j];
+            }
+            /* If so, we're done. When f=1, the Jacobi symbol (g | f)=1. */
+            if (cond == 0) return 1 - 2*(jac & 1);
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int64_t)len - 2) >> 63;
+        cond |= fn;
+        cond |= gn;
+        /* If so, reduce length. */
+        if (cond == 0) --len;
+
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+    }
+
+    /* The loop failed to converge to f=g after 1550 iterations. Return 0, indicating unknown result. */
+    return 0;
+}
+
+#endif /* SECP256K1_MODINV64_IMPL_H */
--- a/libsecp256k1/src/modules/extrakeys/main_impl.h
+++ b/libsecp256k1/src/modules/extrakeys/main_impl.h
@@ -0,0 +1,285 @@
+/***********************************************************************
+ * Copyright (c) 2020 Jonas Nick                                       *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_MODULE_EXTRAKEYS_MAIN_H
+#define SECP256K1_MODULE_EXTRAKEYS_MAIN_H
+
+#include "../../../include/secp256k1.h"
+#include "../../../include/secp256k1_extrakeys.h"
+#include "../../util.h"
+
+static SECP256K1_INLINE int secp256k1_xonly_pubkey_load(const secp256k1_context* ctx, secp256k1_ge *ge, const secp256k1_xonly_pubkey *pubkey) {
+    return secp256k1_pubkey_load(ctx, ge, (const secp256k1_pubkey *) pubkey);
+}
+
+static SECP256K1_INLINE void secp256k1_xonly_pubkey_save(secp256k1_xonly_pubkey *pubkey, secp256k1_ge *ge) {
+    secp256k1_pubkey_save((secp256k1_pubkey *) pubkey, ge);
+}
+
+int secp256k1_xonly_pubkey_parse(const secp256k1_context* ctx, secp256k1_xonly_pubkey *pubkey, const unsigned char *input32) {
+    secp256k1_ge pk;
+    secp256k1_fe x;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ARG_CHECK(input32 != NULL);
+
+    if (!secp256k1_fe_set_b32_limit(&x, input32)) {
+        return 0;
+    }
+    if (!secp256k1_ge_set_xo_var(&pk, &x, 0)) {
+        return 0;
+    }
+    if (!secp256k1_ge_is_in_correct_subgroup(&pk)) {
+        return 0;
+    }
+    secp256k1_xonly_pubkey_save(pubkey, &pk);
+    return 1;
+}
+
+int secp256k1_xonly_pubkey_serialize(const secp256k1_context* ctx, unsigned char *output32, const secp256k1_xonly_pubkey *pubkey) {
+    secp256k1_ge pk;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(output32 != NULL);
+    memset(output32, 0, 32);
+    ARG_CHECK(pubkey != NULL);
+
+    if (!secp256k1_xonly_pubkey_load(ctx, &pk, pubkey)) {
+        return 0;
+    }
+    secp256k1_fe_get_b32(output32, &pk.x);
+    return 1;
+}
+
+int secp256k1_xonly_pubkey_cmp(const secp256k1_context* ctx, const secp256k1_xonly_pubkey* pk0, const secp256k1_xonly_pubkey* pk1) {
+    unsigned char out[2][32];
+    const secp256k1_xonly_pubkey* pk[2];
+    int i;
+
+    VERIFY_CHECK(ctx != NULL);
+    pk[0] = pk0; pk[1] = pk1;
+    for (i = 0; i < 2; i++) {
+        /* If the public key is NULL or invalid, xonly_pubkey_serialize will
+         * call the illegal_callback and return 0. In that case we will
+         * serialize the key as all zeros which is less than any valid public
+         * key. This results in consistent comparisons even if NULL or invalid
+         * pubkeys are involved and prevents edge cases such as sorting
+         * algorithms that use this function and do not terminate as a
+         * result. */
+        if (!secp256k1_xonly_pubkey_serialize(ctx, out[i], pk[i])) {
+            /* Note that xonly_pubkey_serialize should already set the output to
+             * zero in that case, but it's not guaranteed by the API, we can't
+             * test it and writing a VERIFY_CHECK is more complex than
+             * explicitly memsetting (again). */
+            memset(out[i], 0, sizeof(out[i]));
+        }
+    }
+    return secp256k1_memcmp_var(out[0], out[1], sizeof(out[1]));
+}
+
+/** Keeps a group element as is if it has an even Y and otherwise negates it.
+ *  y_parity is set to 0 in the former case and to 1 in the latter case.
+ *  Requires that the coordinates of r are normalized. */
+static int secp256k1_extrakeys_ge_even_y(secp256k1_ge *r) {
+    int y_parity = 0;
+    VERIFY_CHECK(!secp256k1_ge_is_infinity(r));
+
+    if (secp256k1_fe_is_odd(&r->y)) {
+        secp256k1_fe_negate(&r->y, &r->y, 1);
+        y_parity = 1;
+    }
+    return y_parity;
+}
+
+int secp256k1_xonly_pubkey_from_pubkey(const secp256k1_context* ctx, secp256k1_xonly_pubkey *xonly_pubkey, int *pk_parity, const secp256k1_pubkey *pubkey) {
+    secp256k1_ge pk;
+    int tmp;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(xonly_pubkey != NULL);
+    ARG_CHECK(pubkey != NULL);
+
+    if (!secp256k1_pubkey_load(ctx, &pk, pubkey)) {
+        return 0;
+    }
+    tmp = secp256k1_extrakeys_ge_even_y(&pk);
+    if (pk_parity != NULL) {
+        *pk_parity = tmp;
+    }
+    secp256k1_xonly_pubkey_save(xonly_pubkey, &pk);
+    return 1;
+}
+
+int secp256k1_xonly_pubkey_tweak_add(const secp256k1_context* ctx, secp256k1_pubkey *output_pubkey, const secp256k1_xonly_pubkey *internal_pubkey, const unsigned char *tweak32) {
+    secp256k1_ge pk;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(output_pubkey != NULL);
+    memset(output_pubkey, 0, sizeof(*output_pubkey));
+    ARG_CHECK(internal_pubkey != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    if (!secp256k1_xonly_pubkey_load(ctx, &pk, internal_pubkey)
+        || !secp256k1_ec_pubkey_tweak_add_helper(&pk, tweak32)) {
+        return 0;
+    }
+    secp256k1_pubkey_save(output_pubkey, &pk);
+    return 1;
+}
+
+int secp256k1_xonly_pubkey_tweak_add_check(const secp256k1_context* ctx, const unsigned char *tweaked_pubkey32, int tweaked_pk_parity, const secp256k1_xonly_pubkey *internal_pubkey, const unsigned char *tweak32) {
+    secp256k1_ge pk;
+    unsigned char pk_expected32[32];
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(internal_pubkey != NULL);
+    ARG_CHECK(tweaked_pubkey32 != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    if (!secp256k1_xonly_pubkey_load(ctx, &pk, internal_pubkey)
+        || !secp256k1_ec_pubkey_tweak_add_helper(&pk, tweak32)) {
+        return 0;
+    }
+    secp256k1_fe_normalize_var(&pk.x);
+    secp256k1_fe_normalize_var(&pk.y);
+    secp256k1_fe_get_b32(pk_expected32, &pk.x);
+
+    return secp256k1_memcmp_var(&pk_expected32, tweaked_pubkey32, 32) == 0
+            && secp256k1_fe_is_odd(&pk.y) == tweaked_pk_parity;
+}
+
+static void secp256k1_keypair_save(secp256k1_keypair *keypair, const secp256k1_scalar *sk, secp256k1_ge *pk) {
+    secp256k1_scalar_get_b32(&keypair->data[0], sk);
+    secp256k1_pubkey_save((secp256k1_pubkey *)&keypair->data[32], pk);
+}
+
+
+static int secp256k1_keypair_seckey_load(const secp256k1_context* ctx, secp256k1_scalar *sk, const secp256k1_keypair *keypair) {
+    int ret;
+
+    ret = secp256k1_scalar_set_b32_seckey(sk, &keypair->data[0]);
+    /* We can declassify ret here because sk is only zero if a keypair function
+     * failed (which zeroes the keypair) and its return value is ignored. */
+    secp256k1_declassify(ctx, &ret, sizeof(ret));
+    ARG_CHECK(ret);
+    return ret;
+}
+
+/* Load a keypair into pk and sk (if non-NULL). This function declassifies pk
+ * and ARG_CHECKs that the keypair is not invalid. It always initializes sk and
+ * pk with dummy values. */
+static int secp256k1_keypair_load(const secp256k1_context* ctx, secp256k1_scalar *sk, secp256k1_ge *pk, const secp256k1_keypair *keypair) {
+    int ret;
+    const secp256k1_pubkey *pubkey = (const secp256k1_pubkey *)&keypair->data[32];
+
+    /* Need to declassify the pubkey because pubkey_load ARG_CHECKs if it's
+     * invalid. */
+    secp256k1_declassify(ctx, pubkey, sizeof(*pubkey));
+    ret = secp256k1_pubkey_load(ctx, pk, pubkey);
+    if (sk != NULL) {
+        ret = ret && secp256k1_keypair_seckey_load(ctx, sk, keypair);
+    }
+    if (!ret) {
+        *pk = secp256k1_ge_const_g;
+        if (sk != NULL) {
+            *sk = secp256k1_scalar_one;
+        }
+    }
+    return ret;
+}
+
+int secp256k1_keypair_create(const secp256k1_context* ctx, secp256k1_keypair *keypair, const unsigned char *seckey32) {
+    secp256k1_scalar sk;
+    secp256k1_ge pk;
+    int ret = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(keypair != NULL);
+    memset(keypair, 0, sizeof(*keypair));
+    ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    ARG_CHECK(seckey32 != NULL);
+
+    ret = secp256k1_ec_pubkey_create_helper(&ctx->ecmult_gen_ctx, &sk, &pk, seckey32);
+    secp256k1_keypair_save(keypair, &sk, &pk);
+    secp256k1_memczero(keypair, sizeof(*keypair), !ret);
+
+    secp256k1_scalar_clear(&sk);
+    return ret;
+}
+
+int secp256k1_keypair_sec(const secp256k1_context* ctx, unsigned char *seckey, const secp256k1_keypair *keypair) {
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(seckey != NULL);
+    memset(seckey, 0, 32);
+    ARG_CHECK(keypair != NULL);
+
+    memcpy(seckey, &keypair->data[0], 32);
+    return 1;
+}
+
+int secp256k1_keypair_pub(const secp256k1_context* ctx, secp256k1_pubkey *pubkey, const secp256k1_keypair *keypair) {
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ARG_CHECK(keypair != NULL);
+
+    memcpy(pubkey->data, &keypair->data[32], sizeof(*pubkey));
+    return 1;
+}
+
+int secp256k1_keypair_xonly_pub(const secp256k1_context* ctx, secp256k1_xonly_pubkey *pubkey, int *pk_parity, const secp256k1_keypair *keypair) {
+    secp256k1_ge pk;
+    int tmp;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ARG_CHECK(keypair != NULL);
+
+    if (!secp256k1_keypair_load(ctx, NULL, &pk, keypair)) {
+        return 0;
+    }
+    tmp = secp256k1_extrakeys_ge_even_y(&pk);
+    if (pk_parity != NULL) {
+        *pk_parity = tmp;
+    }
+    secp256k1_xonly_pubkey_save(pubkey, &pk);
+
+    return 1;
+}
+
+int secp256k1_keypair_xonly_tweak_add(const secp256k1_context* ctx, secp256k1_keypair *keypair, const unsigned char *tweak32) {
+    secp256k1_ge pk;
+    secp256k1_scalar sk;
+    int y_parity;
+    int ret;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(keypair != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    ret = secp256k1_keypair_load(ctx, &sk, &pk, keypair);
+    memset(keypair, 0, sizeof(*keypair));
+
+    y_parity = secp256k1_extrakeys_ge_even_y(&pk);
+    if (y_parity == 1) {
+        secp256k1_scalar_negate(&sk, &sk);
+    }
+
+    ret &= secp256k1_ec_seckey_tweak_add_helper(&sk, tweak32);
+    ret &= secp256k1_ec_pubkey_tweak_add_helper(&pk, tweak32);
+
+    secp256k1_declassify(ctx, &ret, sizeof(ret));
+    if (ret) {
+        secp256k1_keypair_save(keypair, &sk, &pk);
+    }
+
+    secp256k1_scalar_clear(&sk);
+    return ret;
+}
+
+#endif
--- a/libsecp256k1/src/modules/schnorrsig/main_impl.h
+++ b/libsecp256k1/src/modules/schnorrsig/main_impl.h
@@ -0,0 +1,269 @@
+/***********************************************************************
+ * Copyright (c) 2018-2020 Andrew Poelstra, Jonas Nick                 *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_MODULE_SCHNORRSIG_MAIN_H
+#define SECP256K1_MODULE_SCHNORRSIG_MAIN_H
+
+#include "../../../include/secp256k1.h"
+#include "../../../include/secp256k1_schnorrsig.h"
+#include "../../hash.h"
+
+/* Initializes SHA256 with fixed midstate. This midstate was computed by applying
+ * SHA256 to SHA256("BIP0340/nonce")||SHA256("BIP0340/nonce"). */
+static void secp256k1_nonce_function_bip340_sha256_tagged(secp256k1_sha256 *sha) {
+    secp256k1_sha256_initialize(sha);
+    sha->s[0] = 0x46615b35ul;
+    sha->s[1] = 0xf4bfbff7ul;
+    sha->s[2] = 0x9f8dc671ul;
+    sha->s[3] = 0x83627ab3ul;
+    sha->s[4] = 0x60217180ul;
+    sha->s[5] = 0x57358661ul;
+    sha->s[6] = 0x21a29e54ul;
+    sha->s[7] = 0x68b07b4cul;
+
+    sha->bytes = 64;
+}
+
+/* Initializes SHA256 with fixed midstate. This midstate was computed by applying
+ * SHA256 to SHA256("BIP0340/aux")||SHA256("BIP0340/aux"). */
+static void secp256k1_nonce_function_bip340_sha256_tagged_aux(secp256k1_sha256 *sha) {
+    secp256k1_sha256_initialize(sha);
+    sha->s[0] = 0x24dd3219ul;
+    sha->s[1] = 0x4eba7e70ul;
+    sha->s[2] = 0xca0fabb9ul;
+    sha->s[3] = 0x0fa3166dul;
+    sha->s[4] = 0x3afbe4b1ul;
+    sha->s[5] = 0x4c44df97ul;
+    sha->s[6] = 0x4aac2739ul;
+    sha->s[7] = 0x249e850aul;
+
+    sha->bytes = 64;
+}
+
+/* algo argument for nonce_function_bip340 to derive the nonce exactly as stated in BIP-340
+ * by using the correct tagged hash function. */
+static const unsigned char bip340_algo[] = {'B', 'I', 'P', '0', '3', '4', '0', '/', 'n', 'o', 'n', 'c', 'e'};
+
+static const unsigned char schnorrsig_extraparams_magic[4] = SECP256K1_SCHNORRSIG_EXTRAPARAMS_MAGIC;
+
+static int nonce_function_bip340(unsigned char *nonce32, const unsigned char *msg, size_t msglen, const unsigned char *key32, const unsigned char *xonly_pk32, const unsigned char *algo, size_t algolen, void *data) {
+    secp256k1_sha256 sha;
+    unsigned char masked_key[32];
+    int i;
+
+    if (algo == NULL) {
+        return 0;
+    }
+
+    if (data != NULL) {
+        secp256k1_nonce_function_bip340_sha256_tagged_aux(&sha);
+        secp256k1_sha256_write(&sha, data, 32);
+        secp256k1_sha256_finalize(&sha, masked_key);
+        for (i = 0; i < 32; i++) {
+            masked_key[i] ^= key32[i];
+        }
+    } else {
+        /* Precomputed TaggedHash("BIP0340/aux", 0x0000...00); */
+        static const unsigned char ZERO_MASK[32] = {
+              84, 241, 105, 207, 201, 226, 229, 114,
+             116, 128,  68,  31, 144, 186,  37, 196,
+             136, 244,  97, 199,  11,  94, 165, 220,
+             170, 247, 175, 105, 39,  10, 165,  20
+        };
+        for (i = 0; i < 32; i++) {
+            masked_key[i] = key32[i] ^ ZERO_MASK[i];
+        }
+    }
+
+    /* Tag the hash with algo which is important to avoid nonce reuse across
+     * algorithms. If this nonce function is used in BIP-340 signing as defined
+     * in the spec, an optimized tagging implementation is used. */
+    if (algolen == sizeof(bip340_algo)
+            && secp256k1_memcmp_var(algo, bip340_algo, algolen) == 0) {
+        secp256k1_nonce_function_bip340_sha256_tagged(&sha);
+    } else {
+        secp256k1_sha256_initialize_tagged(&sha, algo, algolen);
+    }
+
+    /* Hash masked-key||pk||msg using the tagged hash as per the spec */
+    secp256k1_sha256_write(&sha, masked_key, 32);
+    secp256k1_sha256_write(&sha, xonly_pk32, 32);
+    secp256k1_sha256_write(&sha, msg, msglen);
+    secp256k1_sha256_finalize(&sha, nonce32);
+    secp256k1_sha256_clear(&sha);
+    return 1;
+}
+
+const secp256k1_nonce_function_hardened secp256k1_nonce_function_bip340 = nonce_function_bip340;
+
+/* Initializes SHA256 with fixed midstate. This midstate was computed by applying
+ * SHA256 to SHA256("BIP0340/challenge")||SHA256("BIP0340/challenge"). */
+static void secp256k1_schnorrsig_sha256_tagged(secp256k1_sha256 *sha) {
+    secp256k1_sha256_initialize(sha);
+    sha->s[0] = 0x9cecba11ul;
+    sha->s[1] = 0x23925381ul;
+    sha->s[2] = 0x11679112ul;
+    sha->s[3] = 0xd1627e0ful;
+    sha->s[4] = 0x97c87550ul;
+    sha->s[5] = 0x003cc765ul;
+    sha->s[6] = 0x90f61164ul;
+    sha->s[7] = 0x33e9b66aul;
+    sha->bytes = 64;
+}
+
+static void secp256k1_schnorrsig_challenge(secp256k1_scalar* e, const unsigned char *r32, const unsigned char *msg, size_t msglen, const unsigned char *pubkey32)
+{
+    unsigned char buf[32];
+    secp256k1_sha256 sha;
+
+    /* tagged hash(r.x, pk.x, msg) */
+    secp256k1_schnorrsig_sha256_tagged(&sha);
+    secp256k1_sha256_write(&sha, r32, 32);
+    secp256k1_sha256_write(&sha, pubkey32, 32);
+    secp256k1_sha256_write(&sha, msg, msglen);
+    secp256k1_sha256_finalize(&sha, buf);
+    /* Set scalar e to the challenge hash modulo the curve order as per
+     * BIP340. */
+    secp256k1_scalar_set_b32(e, buf, NULL);
+}
+
+static int secp256k1_schnorrsig_sign_internal(const secp256k1_context* ctx, unsigned char *sig64, const unsigned char *msg, size_t msglen, const secp256k1_keypair *keypair, secp256k1_nonce_function_hardened noncefp, void *ndata) {
+    secp256k1_scalar sk;
+    secp256k1_scalar e;
+    secp256k1_scalar k;
+    secp256k1_gej rj;
+    secp256k1_ge pk;
+    secp256k1_ge r;
+    unsigned char buf[32] = { 0 };
+    unsigned char pk_buf[32];
+    unsigned char seckey[32];
+    int ret = 1;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    ARG_CHECK(sig64 != NULL);
+    ARG_CHECK(msg != NULL || msglen == 0);
+    ARG_CHECK(keypair != NULL);
+
+    if (noncefp == NULL) {
+        noncefp = secp256k1_nonce_function_bip340;
+    }
+
+    ret &= secp256k1_keypair_load(ctx, &sk, &pk, keypair);
+    /* Because we are signing for a x-only pubkey, the secret key is negated
+     * before signing if the point corresponding to the secret key does not
+     * have an even Y. */
+    if (secp256k1_fe_is_odd(&pk.y)) {
+        secp256k1_scalar_negate(&sk, &sk);
+    }
+
+    secp256k1_scalar_get_b32(seckey, &sk);
+    secp256k1_fe_get_b32(pk_buf, &pk.x);
+    ret &= !!noncefp(buf, msg, msglen, seckey, pk_buf, bip340_algo, sizeof(bip340_algo), ndata);
+    secp256k1_scalar_set_b32(&k, buf, NULL);
+    ret &= !secp256k1_scalar_is_zero(&k);
+    secp256k1_scalar_cmov(&k, &secp256k1_scalar_one, !ret);
+
+    secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &rj, &k);
+    secp256k1_ge_set_gej(&r, &rj);
+
+    /* We declassify r to allow using it as a branch point. This is fine
+     * because r is not a secret. */
+    secp256k1_declassify(ctx, &r, sizeof(r));
+    secp256k1_fe_normalize_var(&r.y);
+    if (secp256k1_fe_is_odd(&r.y)) {
+        secp256k1_scalar_negate(&k, &k);
+    }
+    secp256k1_fe_normalize_var(&r.x);
+    secp256k1_fe_get_b32(&sig64[0], &r.x);
+
+    secp256k1_schnorrsig_challenge(&e, &sig64[0], msg, msglen, pk_buf);
+    secp256k1_scalar_mul(&e, &e, &sk);
+    secp256k1_scalar_add(&e, &e, &k);
+    secp256k1_scalar_get_b32(&sig64[32], &e);
+
+    secp256k1_memczero(sig64, 64, !ret);
+    secp256k1_scalar_clear(&k);
+    secp256k1_scalar_clear(&sk);
+    secp256k1_memclear(seckey, sizeof(seckey));
+    secp256k1_gej_clear(&rj);
+
+    return ret;
+}
+
+int secp256k1_schnorrsig_sign32(const secp256k1_context* ctx, unsigned char *sig64, const unsigned char *msg32, const secp256k1_keypair *keypair, const unsigned char *aux_rand32) {
+    /* We cast away const from the passed aux_rand32 argument since we know the default nonce function does not modify it. */
+    return secp256k1_schnorrsig_sign_internal(ctx, sig64, msg32, 32, keypair, secp256k1_nonce_function_bip340, (unsigned char*)aux_rand32);
+}
+
+int secp256k1_schnorrsig_sign(const secp256k1_context* ctx, unsigned char *sig64, const unsigned char *msg32, const secp256k1_keypair *keypair, const unsigned char *aux_rand32) {
+    return secp256k1_schnorrsig_sign32(ctx, sig64, msg32, keypair, aux_rand32);
+}
+
+int secp256k1_schnorrsig_sign_custom(const secp256k1_context* ctx, unsigned char *sig64, const unsigned char *msg, size_t msglen, const secp256k1_keypair *keypair, secp256k1_schnorrsig_extraparams *extraparams) {
+    secp256k1_nonce_function_hardened noncefp = NULL;
+    void *ndata = NULL;
+    VERIFY_CHECK(ctx != NULL);
+
+    if (extraparams != NULL) {
+        ARG_CHECK(secp256k1_memcmp_var(extraparams->magic,
+                                       schnorrsig_extraparams_magic,
+                                       sizeof(extraparams->magic)) == 0);
+        noncefp = extraparams->noncefp;
+        ndata = extraparams->ndata;
+    }
+    return secp256k1_schnorrsig_sign_internal(ctx, sig64, msg, msglen, keypair, noncefp, ndata);
+}
+
+int secp256k1_schnorrsig_verify(const secp256k1_context* ctx, const unsigned char *sig64, const unsigned char *msg, size_t msglen, const secp256k1_xonly_pubkey *pubkey) {
+    secp256k1_scalar s;
+    secp256k1_scalar e;
+    secp256k1_gej rj;
+    secp256k1_ge pk;
+    secp256k1_gej pkj;
+    secp256k1_fe rx;
+    secp256k1_ge r;
+    unsigned char buf[32];
+    int overflow;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(sig64 != NULL);
+    ARG_CHECK(msg != NULL || msglen == 0);
+    ARG_CHECK(pubkey != NULL);
+
+    if (!secp256k1_fe_set_b32_limit(&rx, &sig64[0])) {
+        return 0;
+    }
+
+    secp256k1_scalar_set_b32(&s, &sig64[32], &overflow);
+    if (overflow) {
+        return 0;
+    }
+
+    if (!secp256k1_xonly_pubkey_load(ctx, &pk, pubkey)) {
+        return 0;
+    }
+
+    /* Compute e. */
+    secp256k1_fe_get_b32(buf, &pk.x);
+    secp256k1_schnorrsig_challenge(&e, &sig64[0], msg, msglen, buf);
+
+    /* Compute rj =  s*G + (-e)*pkj */
+    secp256k1_scalar_negate(&e, &e);
+    secp256k1_gej_set_ge(&pkj, &pk);
+    secp256k1_ecmult(&rj, &pkj, &e, &s);
+
+    secp256k1_ge_set_gej_var(&r, &rj);
+    if (secp256k1_ge_is_infinity(&r)) {
+        return 0;
+    }
+
+    secp256k1_fe_normalize_var(&r.y);
+    return !secp256k1_fe_is_odd(&r.y) &&
+           secp256k1_fe_equal(&rx, &r.x);
+}
+
+#endif
--- a/libsecp256k1/src/precompute_ecmult.c
+++ b/libsecp256k1/src/precompute_ecmult.c
@@ -0,0 +1,90 @@
+/*****************************************************************************************************
+ * Copyright (c) 2013, 2014, 2017, 2021 Pieter Wuille, Andrew Poelstra, Jonas Nick, Russell O'Connor *
+ * Distributed under the MIT software license, see the accompanying                                  *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.                              *
+ *****************************************************************************************************/
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "../include/secp256k1.h"
+
+#include "assumptions.h"
+#include "util.h"
+
+#include "field_impl.h"
+#include "group_impl.h"
+#include "int128_impl.h"
+#include "ecmult.h"
+#include "ecmult_compute_table_impl.h"
+
+static void print_table(FILE *fp, const char *name, int window_g, const secp256k1_ge_storage* table) {
+    int j;
+    int i;
+
+    fprintf(fp, "const secp256k1_ge_storage %s[ECMULT_TABLE_SIZE(WINDOW_G)] = {\n", name);
+    fprintf(fp, " S(%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32
+                  ",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32")\n",
+                SECP256K1_GE_STORAGE_CONST_GET(table[0]));
+
+    j = 1;
+    for(i = 3; i <= window_g; ++i) {
+        fprintf(fp, "#if WINDOW_G > %d\n", i-1);
+        for(;j < ECMULT_TABLE_SIZE(i); ++j) {
+            fprintf(fp, ",S(%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32
+                          ",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32")\n",
+                        SECP256K1_GE_STORAGE_CONST_GET(table[j]));
+        }
+        fprintf(fp, "#endif\n");
+    }
+    fprintf(fp, "};\n");
+}
+
+static void print_two_tables(FILE *fp, int window_g) {
+    secp256k1_ge_storage* table = malloc(ECMULT_TABLE_SIZE(window_g) * sizeof(secp256k1_ge_storage));
+    secp256k1_ge_storage* table_128 = malloc(ECMULT_TABLE_SIZE(window_g) * sizeof(secp256k1_ge_storage));
+
+    secp256k1_ecmult_compute_two_tables(table, table_128, window_g, &secp256k1_ge_const_g);
+
+    print_table(fp, "secp256k1_pre_g", window_g, table);
+    print_table(fp, "secp256k1_pre_g_128", window_g, table_128);
+
+    free(table);
+    free(table_128);
+}
+
+int main(void) {
+    /* Always compute all tables for window sizes up to 15. */
+    int window_g = (ECMULT_WINDOW_SIZE < 15) ? 15 : ECMULT_WINDOW_SIZE;
+    const char outfile[] = "src/precomputed_ecmult.c";
+    FILE* fp;
+
+    fp = fopen(outfile, "w");
+    if (fp == NULL) {
+        fprintf(stderr, "Could not open %s for writing!\n", outfile);
+        return -1;
+    }
+
+    fprintf(fp, "/* This file was automatically generated by precompute_ecmult. */\n");
+    fprintf(fp, "/* This file contains an array secp256k1_pre_g with odd multiples of the base point G and\n");
+    fprintf(fp, " * an array secp256k1_pre_g_128 with odd multiples of 2^128*G for accelerating the computation of a*P + b*G.\n");
+    fprintf(fp, " */\n");
+    fprintf(fp, "#include \"group.h\"\n");
+    fprintf(fp, "#include \"ecmult.h\"\n");
+    fprintf(fp, "#include \"precomputed_ecmult.h\"\n");
+    fprintf(fp, "#define S(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) SECP256K1_GE_STORAGE_CONST(0x##a##u,0x##b##u,0x##c##u,0x##d##u,0x##e##u,0x##f##u,0x##g##u,0x##h##u,0x##i##u,0x##j##u,0x##k##u,0x##l##u,0x##m##u,0x##n##u,0x##o##u,0x##p##u)\n");
+    fprintf(fp, "#if ECMULT_WINDOW_SIZE > %d\n", window_g);
+    fprintf(fp, "   #error configuration mismatch, invalid ECMULT_WINDOW_SIZE. Try deleting precomputed_ecmult.c before the build.\n");
+    fprintf(fp, "#endif\n");
+    fprintf(fp, "#ifdef EXHAUSTIVE_TEST_ORDER\n");
+    fprintf(fp, "#    error Cannot compile precomputed_ecmult.c in exhaustive test mode\n");
+    fprintf(fp, "#endif /* EXHAUSTIVE_TEST_ORDER */\n");
+    fprintf(fp, "#define WINDOW_G ECMULT_WINDOW_SIZE\n");
+
+    print_two_tables(fp, window_g);
+
+    fprintf(fp, "#undef S\n");
+    fclose(fp);
+
+    return 0;
+}
--- a/libsecp256k1/src/precompute_ecmult_gen.c
+++ b/libsecp256k1/src/precompute_ecmult_gen.c
@@ -0,0 +1,100 @@
+/*********************************************************************************
+ * Copyright (c) 2013, 2014, 2015, 2021 Thomas Daede, Cory Fields, Pieter Wuille *
+ * Distributed under the MIT software license, see the accompanying              *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.          *
+ *********************************************************************************/
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "../include/secp256k1.h"
+
+#include "assumptions.h"
+#include "util.h"
+
+#include "group.h"
+#include "int128_impl.h"
+#include "ecmult_gen.h"
+#include "ecmult_gen_compute_table_impl.h"
+
+static const int CONFIGS[][2] = {
+    {2, 5},
+    {11, 6},
+    {43, 6}
+};
+
+static void print_table(FILE* fp, int blocks, int teeth) {
+    int spacing = CEIL_DIV(256, blocks * teeth);
+    size_t points = ((size_t)1) << (teeth - 1);
+    int outer;
+    size_t inner;
+
+    secp256k1_ge_storage* table = checked_malloc(&default_error_callback, blocks * points * sizeof(secp256k1_ge_storage));
+    secp256k1_ecmult_gen_compute_table(table, &secp256k1_ge_const_g, blocks, teeth, spacing);
+
+    fprintf(fp, "#elif (COMB_BLOCKS == %d) && (COMB_TEETH == %d) && (COMB_SPACING == %d)\n", blocks, teeth, spacing);
+    for (outer = 0; outer != blocks; outer++) {
+        fprintf(fp,"{");
+        for (inner = 0; inner != points; inner++) {
+            fprintf(fp, "S(%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32
+                        ",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32")",
+                    SECP256K1_GE_STORAGE_CONST_GET(table[outer * points + inner]));
+            if (inner != points - 1) {
+                fprintf(fp,",\n");
+            }
+        }
+        if (outer != blocks - 1) {
+            fprintf(fp,"},\n");
+        } else {
+            fprintf(fp,"}\n");
+        }
+    }
+    free(table);
+}
+
+int main(int argc, char **argv) {
+    const char outfile[] = "src/precomputed_ecmult_gen.c";
+    FILE* fp;
+    size_t config;
+    int did_current_config = 0;
+
+    (void)argc;
+    (void)argv;
+
+    fp = fopen(outfile, "w");
+    if (fp == NULL) {
+        fprintf(stderr, "Could not open %s for writing!\n", outfile);
+        return -1;
+    }
+
+    fprintf(fp, "/* This file was automatically generated by precompute_ecmult_gen. */\n");
+    fprintf(fp, "/* See ecmult_gen_impl.h for details about the contents of this file. */\n");
+    fprintf(fp, "#include \"group.h\"\n");
+    fprintf(fp, "#include \"ecmult_gen.h\"\n");
+    fprintf(fp, "#include \"precomputed_ecmult_gen.h\"\n");
+    fprintf(fp, "#ifdef EXHAUSTIVE_TEST_ORDER\n");
+    fprintf(fp, "#    error Cannot compile precomputed_ecmult_gen.c in exhaustive test mode\n");
+    fprintf(fp, "#endif /* EXHAUSTIVE_TEST_ORDER */\n");
+    fprintf(fp, "#define S(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) SECP256K1_GE_STORAGE_CONST(0x##a##u,0x##b##u,0x##c##u,0x##d##u,0x##e##u,0x##f##u,0x##g##u,0x##h##u,0x##i##u,0x##j##u,0x##k##u,0x##l##u,0x##m##u,0x##n##u,0x##o##u,0x##p##u)\n");
+
+    fprintf(fp, "const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS] = {\n");
+    fprintf(fp, "#if 0\n");
+    for (config = 0; config < sizeof(CONFIGS) / sizeof(*CONFIGS); ++config) {
+        print_table(fp, CONFIGS[config][0], CONFIGS[config][1]);
+        if (CONFIGS[config][0] == COMB_BLOCKS && CONFIGS[config][1] == COMB_TEETH) {
+            did_current_config = 1;
+        }
+    }
+    if (!did_current_config) {
+        print_table(fp, COMB_BLOCKS, COMB_TEETH);
+    }
+    fprintf(fp, "#else\n");
+    fprintf(fp, "#    error Configuration mismatch, invalid COMB_* parameters. Try deleting precomputed_ecmult_gen.c before the build.\n");
+    fprintf(fp, "#endif\n");
+
+    fprintf(fp, "};\n");
+    fprintf(fp, "#undef S\n");
+    fclose(fp);
+
+    return 0;
+}
--- a/libsecp256k1/src/precomputed_ecmult.c
+++ b/libsecp256k1/src/precomputed_ecmult.c
--- a/libsecp256k1/src/precomputed_ecmult.h
+++ b/libsecp256k1/src/precomputed_ecmult.h
@@ -0,0 +1,38 @@
+/*****************************************************************************************************
+ * Copyright (c) 2013, 2014, 2017, 2021 Pieter Wuille, Andrew Poelstra, Jonas Nick, Russell O'Connor *
+ * Distributed under the MIT software license, see the accompanying                                  *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.                              *
+ *****************************************************************************************************/
+
+#ifndef SECP256K1_PRECOMPUTED_ECMULT_H
+#define SECP256K1_PRECOMPUTED_ECMULT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "ecmult.h"
+#include "group.h"
+#if defined(EXHAUSTIVE_TEST_ORDER)
+#    if EXHAUSTIVE_TEST_ORDER == 7
+#        define WINDOW_G 3
+#    elif EXHAUSTIVE_TEST_ORDER == 13
+#        define WINDOW_G 4
+#    elif EXHAUSTIVE_TEST_ORDER == 199
+#        define WINDOW_G 8
+#    else
+#        error No known generator for the specified exhaustive test group order.
+#    endif
+static secp256k1_ge_storage secp256k1_pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];
+static secp256k1_ge_storage secp256k1_pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)];
+#else /* !defined(EXHAUSTIVE_TEST_ORDER) */
+#    define WINDOW_G ECMULT_WINDOW_SIZE
+extern const secp256k1_ge_storage secp256k1_pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];
+extern const secp256k1_ge_storage secp256k1_pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)];
+#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_PRECOMPUTED_ECMULT_H */
--- a/libsecp256k1/src/precomputed_ecmult_gen.c
+++ b/libsecp256k1/src/precomputed_ecmult_gen.c
--- a/libsecp256k1/src/precomputed_ecmult_gen.h
+++ b/libsecp256k1/src/precomputed_ecmult_gen.h
@@ -0,0 +1,26 @@
+/*********************************************************************************
+ * Copyright (c) 2013, 2014, 2015, 2021 Thomas Daede, Cory Fields, Pieter Wuille *
+ * Distributed under the MIT software license, see the accompanying              *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.          *
+ *********************************************************************************/
+
+#ifndef SECP256K1_PRECOMPUTED_ECMULT_GEN_H
+#define SECP256K1_PRECOMPUTED_ECMULT_GEN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "group.h"
+#include "ecmult_gen.h"
+#ifdef EXHAUSTIVE_TEST_ORDER
+static secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS];
+#else
+extern const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS];
+#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SECP256K1_PRECOMPUTED_ECMULT_GEN_H */
--- a/libsecp256k1/src/scalar.h
+++ b/libsecp256k1/src/scalar.h
@@ -0,0 +1,105 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_H
+#define SECP256K1_SCALAR_H
+
+#include "util.h"
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+#include "scalar_low.h"
+#elif defined(SECP256K1_WIDEMUL_INT128)
+#include "scalar_4x64.h"
+#elif defined(SECP256K1_WIDEMUL_INT64)
+#include "scalar_8x32.h"
+#else
+#error "Please select wide multiplication implementation"
+#endif
+
+/** Clear a scalar to prevent the leak of sensitive data. */
+static void secp256k1_scalar_clear(secp256k1_scalar *r);
+
+/** Access bits (1 < count <= 32) from a scalar. All requested bits must belong to the same 32-bit limb. */
+static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
+
+/** Access bits (1 < count <= 32) from a scalar. offset + count must be < 256. Not constant time in offset and count. */
+static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
+
+/** Set a scalar from a big endian byte array. The scalar will be reduced modulo group order `n`.
+ * In:      bin:        pointer to a 32-byte array.
+ * Out:     r:          scalar to be set.
+ *          overflow:   non-zero if the scalar was bigger or equal to `n` before reduction, zero otherwise (can be NULL).
+ */
+static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *bin, int *overflow);
+
+/** Set a scalar from a big endian byte array and returns 1 if it is a valid
+ *  seckey and 0 otherwise. */
+static int secp256k1_scalar_set_b32_seckey(secp256k1_scalar *r, const unsigned char *bin);
+
+/** Set a scalar to an unsigned integer. */
+static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v);
+
+/** Convert a scalar to a byte array. */
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a);
+
+/** Add two scalars together (modulo the group order). Returns whether it overflowed. */
+static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b);
+
+/** Conditionally add a power of two to a scalar. The result is not allowed to overflow. */
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag);
+
+/** Multiply two scalars (modulo the group order). */
+static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b);
+
+/** Compute the inverse of a scalar (modulo the group order). */
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a);
+
+/** Compute the inverse of a scalar (modulo the group order), without constant-time guarantee. */
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *a);
+
+/** Compute the complement of a scalar (modulo the group order). */
+static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a);
+
+/** Multiply a scalar with the multiplicative inverse of 2. */
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a);
+
+/** Check whether a scalar equals zero. */
+static int secp256k1_scalar_is_zero(const secp256k1_scalar *a);
+
+/** Check whether a scalar equals one. */
+static int secp256k1_scalar_is_one(const secp256k1_scalar *a);
+
+/** Check whether a scalar, considered as an nonnegative integer, is even. */
+static int secp256k1_scalar_is_even(const secp256k1_scalar *a);
+
+/** Check whether a scalar is higher than the group order divided by 2. */
+static int secp256k1_scalar_is_high(const secp256k1_scalar *a);
+
+/** Conditionally negate a number, in constant time.
+ * Returns -1 if the number was negated, 1 otherwise */
+static int secp256k1_scalar_cond_negate(secp256k1_scalar *a, int flag);
+
+/** Compare two scalars. */
+static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b);
+
+/** Find r1 and r2 such that r1+r2*2^128 = k. */
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k);
+/** Find r1 and r2 such that r1+r2*lambda = k, where r1 and r2 or their
+ *  negations are maximum 128 bits long (see secp256k1_ge_mul_lambda). It is
+ *  required that r1, r2, and k all point to different objects. */
+static void secp256k1_scalar_split_lambda(secp256k1_scalar * SECP256K1_RESTRICT r1, secp256k1_scalar * SECP256K1_RESTRICT r2, const secp256k1_scalar * SECP256K1_RESTRICT k);
+
+/** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
+static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
+static void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag);
+
+/** Check invariants on a scalar (no-op unless VERIFY is enabled). */
+static void secp256k1_scalar_verify(const secp256k1_scalar *r);
+#define SECP256K1_SCALAR_VERIFY(r) secp256k1_scalar_verify(r)
+
+#endif /* SECP256K1_SCALAR_H */
--- a/libsecp256k1/src/scalar_4x64.h
+++ b/libsecp256k1/src/scalar_4x64.h
@@ -0,0 +1,19 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_REPR_H
+#define SECP256K1_SCALAR_REPR_H
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint64_t d[4];
+} secp256k1_scalar;
+
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{((uint64_t)(d1)) << 32 | (d0), ((uint64_t)(d3)) << 32 | (d2), ((uint64_t)(d5)) << 32 | (d4), ((uint64_t)(d7)) << 32 | (d6)}}
+
+#endif /* SECP256K1_SCALAR_REPR_H */
--- a/libsecp256k1/src/scalar_4x64_impl.h
+++ b/libsecp256k1/src/scalar_4x64_impl.h
--- a/libsecp256k1/src/scalar_8x32.h
+++ b/libsecp256k1/src/scalar_8x32.h
@@ -0,0 +1,19 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_REPR_H
+#define SECP256K1_SCALAR_REPR_H
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint32_t d[8];
+} secp256k1_scalar;
+
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}}
+
+#endif /* SECP256K1_SCALAR_REPR_H */
--- a/libsecp256k1/src/scalar_8x32_impl.h
+++ b/libsecp256k1/src/scalar_8x32_impl.h
@@ -0,0 +1,816 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_REPR_IMPL_H
+#define SECP256K1_SCALAR_REPR_IMPL_H
+
+#include "checkmem.h"
+#include "modinv32_impl.h"
+#include "util.h"
+
+/* Limbs of the secp256k1 order. */
+#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
+#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
+#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
+#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
+#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
+#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
+
+/* Limbs of 2^256 minus the secp256k1 order. */
+#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
+#define SECP256K1_N_C_1 (~SECP256K1_N_1)
+#define SECP256K1_N_C_2 (~SECP256K1_N_2)
+#define SECP256K1_N_C_3 (~SECP256K1_N_3)
+#define SECP256K1_N_C_4 (1)
+
+/* Limbs of half the secp256k1 order. */
+#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
+#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
+#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
+#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
+#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
+
+SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) {
+    r->d[0] = v;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+    r->d[4] = 0;
+    r->d[5] = 0;
+    r->d[6] = 0;
+    r->d[7] = 0;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
+    SECP256K1_SCALAR_VERIFY(a);
+    VERIFY_CHECK(count > 0 && count <= 32);
+    VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5);
+
+    return (a->d[offset >> 5] >> (offset & 0x1F)) & (0xFFFFFFFF >> (32 - count));
+}
+
+SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
+    SECP256K1_SCALAR_VERIFY(a);
+    VERIFY_CHECK(count > 0 && count <= 32);
+    VERIFY_CHECK(offset + count <= 256);
+
+    if ((offset + count - 1) >> 5 == offset >> 5) {
+        return secp256k1_scalar_get_bits_limb32(a, offset, count);
+    } else {
+        VERIFY_CHECK((offset >> 5) + 1 < 8);
+        return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & (0xFFFFFFFF >> (32 - count));
+    }
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */
+    no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_4);
+    yes |= (a->d[4] > SECP256K1_N_4) & ~no;
+    no |= (a->d[3] < SECP256K1_N_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
+    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
+    return yes;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, uint32_t overflow) {
+    uint64_t t;
+    VERIFY_CHECK(overflow <= 1);
+
+    t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
+    r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
+    r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
+    r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
+    r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[5];
+    r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[6];
+    r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[7];
+    r->d[7] = t & 0xFFFFFFFFUL;
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return overflow;
+}
+
+static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    int overflow;
+    uint64_t t = (uint64_t)a->d[0] + b->d[0];
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[4] + b->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[5] + b->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[6] + b->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[7] + b->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    overflow = t + secp256k1_scalar_check_overflow(r);
+    VERIFY_CHECK(overflow == 0 || overflow == 1);
+    secp256k1_scalar_reduce(r, overflow);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return overflow;
+}
+
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
+    uint64_t t;
+    volatile int vflag = flag;
+    SECP256K1_SCALAR_VERIFY(r);
+    VERIFY_CHECK(bit < 256);
+
+    bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
+    t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F));
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F));
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F));
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F));
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F));
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F));
+    r->d[7] = t & 0xFFFFFFFFULL;
+
+    SECP256K1_SCALAR_VERIFY(r);
+    VERIFY_CHECK((t >> 32) == 0);
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) {
+    int over;
+    r->d[0] = secp256k1_read_be32(&b32[28]);
+    r->d[1] = secp256k1_read_be32(&b32[24]);
+    r->d[2] = secp256k1_read_be32(&b32[20]);
+    r->d[3] = secp256k1_read_be32(&b32[16]);
+    r->d[4] = secp256k1_read_be32(&b32[12]);
+    r->d[5] = secp256k1_read_be32(&b32[8]);
+    r->d[6] = secp256k1_read_be32(&b32[4]);
+    r->d[7] = secp256k1_read_be32(&b32[0]);
+    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    if (overflow) {
+        *overflow = over;
+    }
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    secp256k1_write_be32(&bin[0], a->d[7]);
+    secp256k1_write_be32(&bin[4], a->d[6]);
+    secp256k1_write_be32(&bin[8], a->d[5]);
+    secp256k1_write_be32(&bin[12], a->d[4]);
+    secp256k1_write_be32(&bin[16], a->d[3]);
+    secp256k1_write_be32(&bin[20], a->d[2]);
+    secp256k1_write_be32(&bin[24], a->d[1]);
+    secp256k1_write_be32(&bin[28], a->d[0]);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
+    uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
+    SECP256K1_SCALAR_VERIFY(a);
+
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
+    r->d[7] = t & nonzero;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    /* Writing `/` for field division and `//` for integer division, we compute
+     *
+     *   a/2 = (a - (a&1))/2 + (a&1)/2
+     *       = (a >> 1) + (a&1 ?    1/2 : 0)
+     *       = (a >> 1) + (a&1 ? n//2+1 : 0),
+     *
+     * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
+     * For n//2, we have the constants SECP256K1_N_H_0, ...
+     *
+     * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
+     * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
+     * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
+     * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
+     */
+    uint32_t mask = -(uint32_t)(a->d[0] & 1U);
+    uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31));
+    SECP256K1_SCALAR_VERIFY(a);
+
+    t += (SECP256K1_N_H_0 + 1U) & mask;
+    r->d[0] = t; t >>= 32;
+    t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31));
+    t += SECP256K1_N_H_1 & mask;
+    r->d[1] = t; t >>= 32;
+    t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31));
+    t += SECP256K1_N_H_2 & mask;
+    r->d[2] = t; t >>= 32;
+    t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31));
+    t += SECP256K1_N_H_3 & mask;
+    r->d[3] = t; t >>= 32;
+    t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31));
+    t += SECP256K1_N_H_4 & mask;
+    r->d[4] = t; t >>= 32;
+    t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31));
+    t += SECP256K1_N_H_5 & mask;
+    r->d[5] = t; t >>= 32;
+    t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31));
+    t += SECP256K1_N_H_6 & mask;
+    r->d[6] = t; t >>= 32;
+    r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask);
+
+    /* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation
+     * in full 64 bits to make sure the top 32 bits are indeed zero. */
+    VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
+    int yes = 0;
+    int no = 0;
+    SECP256K1_SCALAR_VERIFY(a);
+
+    no |= (a->d[7] < SECP256K1_N_H_7);
+    yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
+    no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */
+    no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
+    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
+    return yes;
+}
+
+static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
+    /* If we are flag = 0, mask = 00...00 and this is a no-op;
+     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
+    volatile int vflag = flag;
+    uint32_t mask = -vflag;
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0);
+    uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
+    SECP256K1_SCALAR_VERIFY(r);
+
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask);
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask);
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask);
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask);
+    r->d[7] = t & nonzero;
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return 2 * (mask == 0) - 1;
+}
+
+
+/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
+
+/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl);          /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+}
+
+/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
+#define muladd_fast(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl);          /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK(c1 >= th); \
+}
+
+/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define sumadd(a) { \
+    unsigned int over; \
+    c0 += (a);                  /* overflow is handled on the next line */ \
+    over = (c0 < (a)); \
+    c1 += over;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < over);          /* never overflows by contract */ \
+}
+
+/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
+#define sumadd_fast(a) { \
+    c0 += (a);                 /* overflow is handled on the next line */ \
+    c1 += (c0 < (a));          /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
+#define extract(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = c2; \
+    c2 = 0; \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
+#define extract_fast(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = 0; \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint32_t *l) {
+    uint64_t c;
+    uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
+    uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12;
+    uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8;
+
+    /* 96 bit accumulator. */
+    uint32_t c0, c1, c2;
+
+    /* Reduce 512 bits into 385. */
+    /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
+    c0 = l[0]; c1 = 0; c2 = 0;
+    muladd_fast(n0, SECP256K1_N_C_0);
+    extract_fast(m0);
+    sumadd_fast(l[1]);
+    muladd(n1, SECP256K1_N_C_0);
+    muladd(n0, SECP256K1_N_C_1);
+    extract(m1);
+    sumadd(l[2]);
+    muladd(n2, SECP256K1_N_C_0);
+    muladd(n1, SECP256K1_N_C_1);
+    muladd(n0, SECP256K1_N_C_2);
+    extract(m2);
+    sumadd(l[3]);
+    muladd(n3, SECP256K1_N_C_0);
+    muladd(n2, SECP256K1_N_C_1);
+    muladd(n1, SECP256K1_N_C_2);
+    muladd(n0, SECP256K1_N_C_3);
+    extract(m3);
+    sumadd(l[4]);
+    muladd(n4, SECP256K1_N_C_0);
+    muladd(n3, SECP256K1_N_C_1);
+    muladd(n2, SECP256K1_N_C_2);
+    muladd(n1, SECP256K1_N_C_3);
+    sumadd(n0);
+    extract(m4);
+    sumadd(l[5]);
+    muladd(n5, SECP256K1_N_C_0);
+    muladd(n4, SECP256K1_N_C_1);
+    muladd(n3, SECP256K1_N_C_2);
+    muladd(n2, SECP256K1_N_C_3);
+    sumadd(n1);
+    extract(m5);
+    sumadd(l[6]);
+    muladd(n6, SECP256K1_N_C_0);
+    muladd(n5, SECP256K1_N_C_1);
+    muladd(n4, SECP256K1_N_C_2);
+    muladd(n3, SECP256K1_N_C_3);
+    sumadd(n2);
+    extract(m6);
+    sumadd(l[7]);
+    muladd(n7, SECP256K1_N_C_0);
+    muladd(n6, SECP256K1_N_C_1);
+    muladd(n5, SECP256K1_N_C_2);
+    muladd(n4, SECP256K1_N_C_3);
+    sumadd(n3);
+    extract(m7);
+    muladd(n7, SECP256K1_N_C_1);
+    muladd(n6, SECP256K1_N_C_2);
+    muladd(n5, SECP256K1_N_C_3);
+    sumadd(n4);
+    extract(m8);
+    muladd(n7, SECP256K1_N_C_2);
+    muladd(n6, SECP256K1_N_C_3);
+    sumadd(n5);
+    extract(m9);
+    muladd(n7, SECP256K1_N_C_3);
+    sumadd(n6);
+    extract(m10);
+    sumadd_fast(n7);
+    extract_fast(m11);
+    VERIFY_CHECK(c0 <= 1);
+    m12 = c0;
+
+    /* Reduce 385 bits into 258. */
+    /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
+    c0 = m0; c1 = 0; c2 = 0;
+    muladd_fast(m8, SECP256K1_N_C_0);
+    extract_fast(p0);
+    sumadd_fast(m1);
+    muladd(m9, SECP256K1_N_C_0);
+    muladd(m8, SECP256K1_N_C_1);
+    extract(p1);
+    sumadd(m2);
+    muladd(m10, SECP256K1_N_C_0);
+    muladd(m9, SECP256K1_N_C_1);
+    muladd(m8, SECP256K1_N_C_2);
+    extract(p2);
+    sumadd(m3);
+    muladd(m11, SECP256K1_N_C_0);
+    muladd(m10, SECP256K1_N_C_1);
+    muladd(m9, SECP256K1_N_C_2);
+    muladd(m8, SECP256K1_N_C_3);
+    extract(p3);
+    sumadd(m4);
+    muladd(m12, SECP256K1_N_C_0);
+    muladd(m11, SECP256K1_N_C_1);
+    muladd(m10, SECP256K1_N_C_2);
+    muladd(m9, SECP256K1_N_C_3);
+    sumadd(m8);
+    extract(p4);
+    sumadd(m5);
+    muladd(m12, SECP256K1_N_C_1);
+    muladd(m11, SECP256K1_N_C_2);
+    muladd(m10, SECP256K1_N_C_3);
+    sumadd(m9);
+    extract(p5);
+    sumadd(m6);
+    muladd(m12, SECP256K1_N_C_2);
+    muladd(m11, SECP256K1_N_C_3);
+    sumadd(m10);
+    extract(p6);
+    sumadd_fast(m7);
+    muladd_fast(m12, SECP256K1_N_C_3);
+    sumadd_fast(m11);
+    extract_fast(p7);
+    p8 = c0 + m12;
+    VERIFY_CHECK(p8 <= 2);
+
+    /* Reduce 258 bits into 256. */
+    /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
+    c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
+    r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
+    r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
+    r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
+    r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p4 + (uint64_t)p8;
+    r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p5;
+    r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p6;
+    r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p7;
+    r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
+
+    /* Final reduction of r. */
+    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    /* 96 bit accumulator. */
+    uint32_t c0 = 0, c1 = 0, c2 = 0;
+
+    /* l[0..15] = a[0..7] * b[0..7]. */
+    muladd_fast(a->d[0], b->d[0]);
+    extract_fast(l[0]);
+    muladd(a->d[0], b->d[1]);
+    muladd(a->d[1], b->d[0]);
+    extract(l[1]);
+    muladd(a->d[0], b->d[2]);
+    muladd(a->d[1], b->d[1]);
+    muladd(a->d[2], b->d[0]);
+    extract(l[2]);
+    muladd(a->d[0], b->d[3]);
+    muladd(a->d[1], b->d[2]);
+    muladd(a->d[2], b->d[1]);
+    muladd(a->d[3], b->d[0]);
+    extract(l[3]);
+    muladd(a->d[0], b->d[4]);
+    muladd(a->d[1], b->d[3]);
+    muladd(a->d[2], b->d[2]);
+    muladd(a->d[3], b->d[1]);
+    muladd(a->d[4], b->d[0]);
+    extract(l[4]);
+    muladd(a->d[0], b->d[5]);
+    muladd(a->d[1], b->d[4]);
+    muladd(a->d[2], b->d[3]);
+    muladd(a->d[3], b->d[2]);
+    muladd(a->d[4], b->d[1]);
+    muladd(a->d[5], b->d[0]);
+    extract(l[5]);
+    muladd(a->d[0], b->d[6]);
+    muladd(a->d[1], b->d[5]);
+    muladd(a->d[2], b->d[4]);
+    muladd(a->d[3], b->d[3]);
+    muladd(a->d[4], b->d[2]);
+    muladd(a->d[5], b->d[1]);
+    muladd(a->d[6], b->d[0]);
+    extract(l[6]);
+    muladd(a->d[0], b->d[7]);
+    muladd(a->d[1], b->d[6]);
+    muladd(a->d[2], b->d[5]);
+    muladd(a->d[3], b->d[4]);
+    muladd(a->d[4], b->d[3]);
+    muladd(a->d[5], b->d[2]);
+    muladd(a->d[6], b->d[1]);
+    muladd(a->d[7], b->d[0]);
+    extract(l[7]);
+    muladd(a->d[1], b->d[7]);
+    muladd(a->d[2], b->d[6]);
+    muladd(a->d[3], b->d[5]);
+    muladd(a->d[4], b->d[4]);
+    muladd(a->d[5], b->d[3]);
+    muladd(a->d[6], b->d[2]);
+    muladd(a->d[7], b->d[1]);
+    extract(l[8]);
+    muladd(a->d[2], b->d[7]);
+    muladd(a->d[3], b->d[6]);
+    muladd(a->d[4], b->d[5]);
+    muladd(a->d[5], b->d[4]);
+    muladd(a->d[6], b->d[3]);
+    muladd(a->d[7], b->d[2]);
+    extract(l[9]);
+    muladd(a->d[3], b->d[7]);
+    muladd(a->d[4], b->d[6]);
+    muladd(a->d[5], b->d[5]);
+    muladd(a->d[6], b->d[4]);
+    muladd(a->d[7], b->d[3]);
+    extract(l[10]);
+    muladd(a->d[4], b->d[7]);
+    muladd(a->d[5], b->d[6]);
+    muladd(a->d[6], b->d[5]);
+    muladd(a->d[7], b->d[4]);
+    extract(l[11]);
+    muladd(a->d[5], b->d[7]);
+    muladd(a->d[6], b->d[6]);
+    muladd(a->d[7], b->d[5]);
+    extract(l[12]);
+    muladd(a->d[6], b->d[7]);
+    muladd(a->d[7], b->d[6]);
+    extract(l[13]);
+    muladd_fast(a->d[7], b->d[7]);
+    extract_fast(l[14]);
+    VERIFY_CHECK(c1 == 0);
+    l[15] = c0;
+}
+
+#undef sumadd
+#undef sumadd_fast
+#undef muladd
+#undef muladd_fast
+#undef extract
+#undef extract_fast
+
+static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    uint32_t l[16];
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    secp256k1_scalar_mul_512(l, a, b);
+    secp256k1_scalar_reduce_512(r, l);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
+    SECP256K1_SCALAR_VERIFY(k);
+
+    r1->d[0] = k->d[0];
+    r1->d[1] = k->d[1];
+    r1->d[2] = k->d[2];
+    r1->d[3] = k->d[3];
+    r1->d[4] = 0;
+    r1->d[5] = 0;
+    r1->d[6] = 0;
+    r1->d[7] = 0;
+    r2->d[0] = k->d[4];
+    r2->d[1] = k->d[5];
+    r2->d[2] = k->d[6];
+    r2->d[3] = k->d[7];
+    r2->d[4] = 0;
+    r2->d[5] = 0;
+    r2->d[6] = 0;
+    r2->d[7] = 0;
+
+    SECP256K1_SCALAR_VERIFY(r1);
+    SECP256K1_SCALAR_VERIFY(r2);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
+    uint32_t l[16];
+    unsigned int shiftlimbs;
+    unsigned int shiftlow;
+    unsigned int shifthigh;
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+    VERIFY_CHECK(shift >= 256);
+
+    secp256k1_scalar_mul_512(l, a, b);
+    shiftlimbs = shift >> 5;
+    shiftlow = shift & 0x1F;
+    shifthigh = 32 - shiftlow;
+    r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[3] = shift < 416 ? (l[3 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[4 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[4] = shift < 384 ? (l[4 + shiftlimbs] >> shiftlow | (shift < 352 && shiftlow ? (l[5 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
+    r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
+    uint32_t mask0, mask1;
+    volatile int vflag = flag;
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_CHECKMEM_CHECK_VERIFY(r->d, sizeof(r->d));
+
+    mask0 = vflag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->d[0] = (r->d[0] & mask0) | (a->d[0] & mask1);
+    r->d[1] = (r->d[1] & mask0) | (a->d[1] & mask1);
+    r->d[2] = (r->d[2] & mask0) | (a->d[2] & mask1);
+    r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1);
+    r->d[4] = (r->d[4] & mask0) | (a->d[4] & mask1);
+    r->d[5] = (r->d[5] & mask0) | (a->d[5] & mask1);
+    r->d[6] = (r->d[6] & mask0) | (a->d[6] & mask1);
+    r->d[7] = (r->d[7] & mask0) | (a->d[7] & mask1);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_modinv32_signed30 *a) {
+    const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
+                   a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
+
+    /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
+     * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
+     */
+    VERIFY_CHECK(a0 >> 30 == 0);
+    VERIFY_CHECK(a1 >> 30 == 0);
+    VERIFY_CHECK(a2 >> 30 == 0);
+    VERIFY_CHECK(a3 >> 30 == 0);
+    VERIFY_CHECK(a4 >> 30 == 0);
+    VERIFY_CHECK(a5 >> 30 == 0);
+    VERIFY_CHECK(a6 >> 30 == 0);
+    VERIFY_CHECK(a7 >> 30 == 0);
+    VERIFY_CHECK(a8 >> 16 == 0);
+
+    r->d[0] = a0       | a1 << 30;
+    r->d[1] = a1 >>  2 | a2 << 28;
+    r->d[2] = a2 >>  4 | a3 << 26;
+    r->d[3] = a3 >>  6 | a4 << 24;
+    r->d[4] = a4 >>  8 | a5 << 22;
+    r->d[5] = a5 >> 10 | a6 << 20;
+    r->d[6] = a6 >> 12 | a7 << 18;
+    r->d[7] = a7 >> 14 | a8 << 16;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
+    const uint32_t M30 = UINT32_MAX >> 2;
+    const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
+                   a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
+    SECP256K1_SCALAR_VERIFY(a);
+
+    r->v[0] =  a0                   & M30;
+    r->v[1] = (a0 >> 30 | a1 <<  2) & M30;
+    r->v[2] = (a1 >> 28 | a2 <<  4) & M30;
+    r->v[3] = (a2 >> 26 | a3 <<  6) & M30;
+    r->v[4] = (a3 >> 24 | a4 <<  8) & M30;
+    r->v[5] = (a4 >> 22 | a5 << 10) & M30;
+    r->v[6] = (a5 >> 20 | a6 << 12) & M30;
+    r->v[7] = (a6 >> 18 | a7 << 14) & M30;
+    r->v[8] =  a7 >> 16;
+}
+
+static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_scalar = {
+    {{0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L, 0, 0, 0, 65536}},
+    0x2A774EC1L
+};
+
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
+    secp256k1_modinv32_signed30 s;
+#ifdef VERIFY
+    int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+    SECP256K1_SCALAR_VERIFY(x);
+
+    secp256k1_scalar_to_signed30(&s, x);
+    secp256k1_modinv32(&s, &secp256k1_const_modinfo_scalar);
+    secp256k1_scalar_from_signed30(r, &s);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
+    secp256k1_modinv32_signed30 s;
+#ifdef VERIFY
+    int zero_in = secp256k1_scalar_is_zero(x);
+#endif
+    SECP256K1_SCALAR_VERIFY(x);
+
+    secp256k1_scalar_to_signed30(&s, x);
+    secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_scalar);
+    secp256k1_scalar_from_signed30(r, &s);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return !(a->d[0] & 1);
+}
+
+#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
--- a/libsecp256k1/src/scalar_impl.h
+++ b/libsecp256k1/src/scalar_impl.h
@@ -0,0 +1,321 @@
+/***********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_IMPL_H
+#define SECP256K1_SCALAR_IMPL_H
+
+#ifdef VERIFY
+#include <string.h>
+#endif
+
+#include "scalar.h"
+#include "util.h"
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+#include "scalar_low_impl.h"
+#elif defined(SECP256K1_WIDEMUL_INT128)
+#include "scalar_4x64_impl.h"
+#elif defined(SECP256K1_WIDEMUL_INT64)
+#include "scalar_8x32_impl.h"
+#else
+#error "Please select wide multiplication implementation"
+#endif
+
+static const secp256k1_scalar secp256k1_scalar_one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+static const secp256k1_scalar secp256k1_scalar_zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+
+SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar *r) {
+    secp256k1_memclear(r, sizeof(secp256k1_scalar));
+}
+
+static int secp256k1_scalar_set_b32_seckey(secp256k1_scalar *r, const unsigned char *bin) {
+    int overflow;
+    secp256k1_scalar_set_b32(r, bin, &overflow);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return (!overflow) & (!secp256k1_scalar_is_zero(r));
+}
+
+static void secp256k1_scalar_verify(const secp256k1_scalar *r) {
+    VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
+
+    (void)r;
+}
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+/* Begin of section generated by sage/gen_exhaustive_groups.sage. */
+#  if EXHAUSTIVE_TEST_ORDER == 7
+#    define EXHAUSTIVE_TEST_LAMBDA 2
+#  elif EXHAUSTIVE_TEST_ORDER == 13
+#    define EXHAUSTIVE_TEST_LAMBDA 9
+#  elif EXHAUSTIVE_TEST_ORDER == 199
+#    define EXHAUSTIVE_TEST_LAMBDA 92
+#  else
+#    error No known lambda for the specified exhaustive test group order.
+#  endif
+/* End of section generated by sage/gen_exhaustive_groups.sage. */
+
+/**
+ * Find r1 and r2 given k, such that r1 + r2 * lambda == k mod n; unlike in the
+ * full case we don't bother making r1 and r2 be small, we just want them to be
+ * nontrivial to get full test coverage for the exhaustive tests. We therefore
+ * (arbitrarily) set r2 = k + 5 (mod n) and r1 = k - r2 * lambda (mod n).
+ */
+static void secp256k1_scalar_split_lambda(secp256k1_scalar * SECP256K1_RESTRICT r1, secp256k1_scalar * SECP256K1_RESTRICT r2, const secp256k1_scalar * SECP256K1_RESTRICT k) {
+    SECP256K1_SCALAR_VERIFY(k);
+    VERIFY_CHECK(r1 != k);
+    VERIFY_CHECK(r2 != k);
+    VERIFY_CHECK(r1 != r2);
+
+    *r2 = (*k + 5) % EXHAUSTIVE_TEST_ORDER;
+    *r1 = (*k + (EXHAUSTIVE_TEST_ORDER - *r2) * EXHAUSTIVE_TEST_LAMBDA) % EXHAUSTIVE_TEST_ORDER;
+
+    SECP256K1_SCALAR_VERIFY(r1);
+    SECP256K1_SCALAR_VERIFY(r2);
+}
+#else
+/**
+ * The Secp256k1 curve has an endomorphism, where lambda * (x, y) = (beta * x, y), where
+ * lambda is: */
+static const secp256k1_scalar secp256k1_const_lambda = SECP256K1_SCALAR_CONST(
+    0x5363AD4CUL, 0xC05C30E0UL, 0xA5261C02UL, 0x8812645AUL,
+    0x122E22EAUL, 0x20816678UL, 0xDF02967CUL, 0x1B23BD72UL
+);
+
+#ifdef VERIFY
+static void secp256k1_scalar_split_lambda_verify(const secp256k1_scalar *r1, const secp256k1_scalar *r2, const secp256k1_scalar *k);
+#endif
+
+/*
+ * Both lambda and beta are primitive cube roots of unity.  That is lamba^3 == 1 mod n and
+ * beta^3 == 1 mod p, where n is the curve order and p is the field order.
+ *
+ * Furthermore, because (X^3 - 1) = (X - 1)(X^2 + X + 1), the primitive cube roots of unity are
+ * roots of X^2 + X + 1.  Therefore lambda^2 + lamba == -1 mod n and beta^2 + beta == -1 mod p.
+ * (The other primitive cube roots of unity are lambda^2 and beta^2 respectively.)
+ *
+ * Let l = -1/2 + i*sqrt(3)/2, the complex root of X^2 + X + 1. We can define a ring
+ * homomorphism phi : Z[l] -> Z_n where phi(a + b*l) == a + b*lambda mod n. The kernel of phi
+ * is a lattice over Z[l] (considering Z[l] as a Z-module). This lattice is generated by a
+ * reduced basis {a1 + b1*l, a2 + b2*l} where
+ *
+ * - a1 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ * - b1 =     -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3}
+ * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8}
+ * - b2 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ *
+ * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
+ * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
+ * and k2 are small in absolute value.
+ *
+ * The algorithm computes c1 = round(b2 * k / n) and c2 = round((-b1) * k / n), and gives
+ * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and
+ * compute r2 = k2 mod n, and r1 = k1 mod n = (k - r2 * lambda) mod n, avoiding the need for
+ * the constants a1 and a2.
+ *
+ * g1, g2 are precomputed constants used to replace division with a rounded multiplication
+ * when decomposing the scalar for an endomorphism-based point multiplication.
+ *
+ * The possibility of using precomputed estimates is mentioned in "Guide to Elliptic Curve
+ * Cryptography" (Hankerson, Menezes, Vanstone) in section 3.5.
+ *
+ * The derivation is described in the paper "Efficient Software Implementation of Public-Key
+ * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez),
+ * Section 4.3 (here we use a somewhat higher-precision estimate):
+ * d = a1*b2 - b1*a2
+ * g1 = round(2^384 * b2/d)
+ * g2 = round(2^384 * (-b1)/d)
+ *
+ * (Note that d is also equal to the curve order, n, here because [a1,b1] and [a2,b2]
+ * can be found as outputs of the Extended Euclidean Algorithm on inputs n and lambda).
+ *
+ * The function below splits k into r1 and r2, such that
+ * - r1 + lambda * r2 == k (mod n)
+ * - either r1 < 2^128 or -r1 mod n < 2^128
+ * - either r2 < 2^128 or -r2 mod n < 2^128
+ *
+ * See proof below.
+ */
+static void secp256k1_scalar_split_lambda(secp256k1_scalar * SECP256K1_RESTRICT r1, secp256k1_scalar * SECP256K1_RESTRICT r2, const secp256k1_scalar * SECP256K1_RESTRICT k) {
+    secp256k1_scalar c1, c2;
+    static const secp256k1_scalar minus_b1 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+        0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C3UL
+    );
+    static const secp256k1_scalar minus_b2 = SECP256K1_SCALAR_CONST(
+        0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+        0x8A280AC5UL, 0x0774346DUL, 0xD765CDA8UL, 0x3DB1562CUL
+    );
+    static const secp256k1_scalar g1 = SECP256K1_SCALAR_CONST(
+        0x3086D221UL, 0xA7D46BCDUL, 0xE86C90E4UL, 0x9284EB15UL,
+        0x3DAA8A14UL, 0x71E8CA7FUL, 0xE893209AUL, 0x45DBB031UL
+    );
+    static const secp256k1_scalar g2 = SECP256K1_SCALAR_CONST(
+        0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C4UL,
+        0x221208ACUL, 0x9DF506C6UL, 0x1571B4AEUL, 0x8AC47F71UL
+    );
+    SECP256K1_SCALAR_VERIFY(k);
+    VERIFY_CHECK(r1 != k);
+    VERIFY_CHECK(r2 != k);
+    VERIFY_CHECK(r1 != r2);
+
+    /* these _var calls are constant time since the shift amount is constant */
+    secp256k1_scalar_mul_shift_var(&c1, k, &g1, 384);
+    secp256k1_scalar_mul_shift_var(&c2, k, &g2, 384);
+    secp256k1_scalar_mul(&c1, &c1, &minus_b1);
+    secp256k1_scalar_mul(&c2, &c2, &minus_b2);
+    secp256k1_scalar_add(r2, &c1, &c2);
+    secp256k1_scalar_mul(r1, r2, &secp256k1_const_lambda);
+    secp256k1_scalar_negate(r1, r1);
+    secp256k1_scalar_add(r1, r1, k);
+
+    SECP256K1_SCALAR_VERIFY(r1);
+    SECP256K1_SCALAR_VERIFY(r2);
+#ifdef VERIFY
+    secp256k1_scalar_split_lambda_verify(r1, r2, k);
+#endif
+}
+
+#ifdef VERIFY
+/*
+ * Proof for secp256k1_scalar_split_lambda's bounds.
+ *
+ * Let
+ *  - epsilon1 = 2^256 * |g1/2^384 - b2/d|
+ *  - epsilon2 = 2^256 * |g2/2^384 - (-b1)/d|
+ *  - c1 = round(k*g1/2^384)
+ *  - c2 = round(k*g2/2^384)
+ *
+ * Lemma 1: |c1 - k*b2/d| < 2^-1 + epsilon1
+ *
+ *    |c1 - k*b2/d|
+ *  =
+ *    |c1 - k*g1/2^384 + k*g1/2^384 - k*b2/d|
+ * <=   {triangle inequality}
+ *    |c1 - k*g1/2^384| + |k*g1/2^384 - k*b2/d|
+ *  =
+ *    |c1 - k*g1/2^384| + k*|g1/2^384 - b2/d|
+ * <    {rounding in c1 and 0 <= k < 2^256}
+ *    2^-1 + 2^256 * |g1/2^384 - b2/d|
+ *  =   {definition of epsilon1}
+ *    2^-1 + epsilon1
+ *
+ * Lemma 2: |c2 - k*(-b1)/d| < 2^-1 + epsilon2
+ *
+ *    |c2 - k*(-b1)/d|
+ *  =
+ *    |c2 - k*g2/2^384 + k*g2/2^384 - k*(-b1)/d|
+ * <=   {triangle inequality}
+ *    |c2 - k*g2/2^384| + |k*g2/2^384 - k*(-b1)/d|
+ *  =
+ *    |c2 - k*g2/2^384| + k*|g2/2^384 - (-b1)/d|
+ * <    {rounding in c2 and 0 <= k < 2^256}
+ *    2^-1 + 2^256 * |g2/2^384 - (-b1)/d|
+ *  =   {definition of epsilon2}
+ *    2^-1 + epsilon2
+ *
+ * Let
+ *  - k1 = k - c1*a1 - c2*a2
+ *  - k2 = - c1*b1 - c2*b2
+ *
+ * Lemma 3: |k1| < (a1 + a2 + 1)/2 < 2^128
+ *
+ *    |k1|
+ *  =   {definition of k1}
+ *    |k - c1*a1 - c2*a2|
+ *  =   {(a1*b2 - b1*a2)/n = 1}
+ *    |k*(a1*b2 - b1*a2)/n - c1*a1 - c2*a2|
+ *  =
+ *    |a1*(k*b2/n - c1) + a2*(k*(-b1)/n - c2)|
+ * <=   {triangle inequality}
+ *    a1*|k*b2/n - c1| + a2*|k*(-b1)/n - c2|
+ * <    {Lemma 1 and Lemma 2}
+ *    a1*(2^-1 + epsilon1) + a2*(2^-1 + epsilon2)
+ * <    {rounding up to an integer}
+ *    (a1 + a2 + 1)/2
+ * <    {rounding up to a power of 2}
+ *    2^128
+ *
+ * Lemma 4: |k2| < (-b1 + b2)/2 + 1 < 2^128
+ *
+ *    |k2|
+ *  =   {definition of k2}
+ *    |- c1*a1 - c2*a2|
+ *  =   {(b1*b2 - b1*b2)/n = 0}
+ *    |k*(b1*b2 - b1*b2)/n - c1*b1 - c2*b2|
+ *  =
+ *    |b1*(k*b2/n - c1) + b2*(k*(-b1)/n - c2)|
+ * <=   {triangle inequality}
+ *    (-b1)*|k*b2/n - c1| + b2*|k*(-b1)/n - c2|
+ * <    {Lemma 1 and Lemma 2}
+ *    (-b1)*(2^-1 + epsilon1) + b2*(2^-1 + epsilon2)
+ * <    {rounding up to an integer}
+ *    (-b1 + b2)/2 + 1
+ * <    {rounding up to a power of 2}
+ *    2^128
+ *
+ * Let
+ *  - r2 = k2 mod n
+ *  - r1 = k - r2*lambda mod n.
+ *
+ * Notice that r1 is defined such that r1 + r2 * lambda == k (mod n).
+ *
+ * Lemma 5: r1 == k1 mod n.
+ *
+ *    r1
+ * ==   {definition of r1 and r2}
+ *    k - k2*lambda
+ * ==   {definition of k2}
+ *    k - (- c1*b1 - c2*b2)*lambda
+ * ==
+ *    k + c1*b1*lambda + c2*b2*lambda
+ * ==  {a1 + b1*lambda == 0 mod n and a2 + b2*lambda == 0 mod n}
+ *    k - c1*a1 - c2*a2
+ * ==  {definition of k1}
+ *    k1
+ *
+ * From Lemma 3, Lemma 4, Lemma 5 and the definition of r2, we can conclude that
+ *
+ *  - either r1 < 2^128 or -r1 mod n < 2^128
+ *  - either r2 < 2^128 or -r2 mod n < 2^128.
+ *
+ * Q.E.D.
+ */
+static void secp256k1_scalar_split_lambda_verify(const secp256k1_scalar *r1, const secp256k1_scalar *r2, const secp256k1_scalar *k) {
+    secp256k1_scalar s;
+    unsigned char buf1[32];
+    unsigned char buf2[32];
+
+    /* (a1 + a2 + 1)/2 is 0xa2a8918ca85bafe22016d0b917e4dd77 */
+    static const unsigned char k1_bound[32] = {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0xa2, 0xa8, 0x91, 0x8c, 0xa8, 0x5b, 0xaf, 0xe2, 0x20, 0x16, 0xd0, 0xb9, 0x17, 0xe4, 0xdd, 0x77
+    };
+
+    /* (-b1 + b2)/2 + 1 is 0x8a65287bd47179fb2be08846cea267ed */
+    static const unsigned char k2_bound[32] = {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x8a, 0x65, 0x28, 0x7b, 0xd4, 0x71, 0x79, 0xfb, 0x2b, 0xe0, 0x88, 0x46, 0xce, 0xa2, 0x67, 0xed
+    };
+
+    secp256k1_scalar_mul(&s, &secp256k1_const_lambda, r2);
+    secp256k1_scalar_add(&s, &s, r1);
+    VERIFY_CHECK(secp256k1_scalar_eq(&s, k));
+
+    secp256k1_scalar_negate(&s, r1);
+    secp256k1_scalar_get_b32(buf1, r1);
+    secp256k1_scalar_get_b32(buf2, &s);
+    VERIFY_CHECK(secp256k1_memcmp_var(buf1, k1_bound, 32) < 0 || secp256k1_memcmp_var(buf2, k1_bound, 32) < 0);
+
+    secp256k1_scalar_negate(&s, r2);
+    secp256k1_scalar_get_b32(buf1, r2);
+    secp256k1_scalar_get_b32(buf2, &s);
+    VERIFY_CHECK(secp256k1_memcmp_var(buf1, k2_bound, 32) < 0 || secp256k1_memcmp_var(buf2, k2_bound, 32) < 0);
+}
+#endif /* VERIFY */
+#endif /* !defined(EXHAUSTIVE_TEST_ORDER) */
+
+#endif /* SECP256K1_SCALAR_IMPL_H */
--- a/libsecp256k1/src/scalar_low.h
+++ b/libsecp256k1/src/scalar_low.h
@@ -0,0 +1,24 @@
+/***********************************************************************
+ * Copyright (c) 2015, 2022 Andrew Poelstra, Pieter Wuille             *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_REPR_H
+#define SECP256K1_SCALAR_REPR_H
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef uint32_t secp256k1_scalar;
+
+/* A compile-time constant equal to 2^32 (modulo order). */
+#define SCALAR_2P32 ((0xffffffffUL % EXHAUSTIVE_TEST_ORDER) + 1U)
+
+/* Compute a*2^32 + b (modulo order). */
+#define SCALAR_HORNER(a, b) (((uint64_t)(a) * SCALAR_2P32 + (b)) % EXHAUSTIVE_TEST_ORDER)
+
+/* Evaluates to the provided 256-bit constant reduced modulo order. */
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER((d7), (d6)), (d5)), (d4)), (d3)), (d2)), (d1)), (d0))
+
+#endif /* SECP256K1_SCALAR_REPR_H */
--- a/libsecp256k1/src/scalar_low_impl.h
+++ b/libsecp256k1/src/scalar_low_impl.h
@@ -0,0 +1,206 @@
+/***********************************************************************
+ * Copyright (c) 2015 Andrew Poelstra                                  *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCALAR_REPR_IMPL_H
+#define SECP256K1_SCALAR_REPR_IMPL_H
+
+#include "checkmem.h"
+#include "scalar.h"
+#include "util.h"
+
+#include <string.h>
+
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return !(*a & 1);
+}
+
+SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) {
+    *r = v % EXHAUSTIVE_TEST_ORDER;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    VERIFY_CHECK(count > 0 && count <= 32);
+    if (offset < 32) {
+        return (*a >> offset) & (0xFFFFFFFF >> (32 - count));
+    } else {
+        return 0;
+    }
+}
+
+SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return secp256k1_scalar_get_bits_limb32(a, offset, count);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) { return *a >= EXHAUSTIVE_TEST_ORDER; }
+
+static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    *r = (*a + *b) % EXHAUSTIVE_TEST_ORDER;
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return *r < *b;
+}
+
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
+    SECP256K1_SCALAR_VERIFY(r);
+
+    if (flag && bit < 32)
+        *r += ((uint32_t)1 << bit);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    VERIFY_CHECK(bit < 32);
+    /* Verify that adding (1 << bit) will not overflow any in-range scalar *r by overflowing the underlying uint32_t. */
+    VERIFY_CHECK(((uint32_t)1 << bit) - 1 <= UINT32_MAX - EXHAUSTIVE_TEST_ORDER);
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) {
+    int i;
+    int over = 0;
+    *r = 0;
+    for (i = 0; i < 32; i++) {
+        *r = (*r * 0x100) + b32[i];
+        if (*r >= EXHAUSTIVE_TEST_ORDER) {
+            over = 1;
+            *r %= EXHAUSTIVE_TEST_ORDER;
+        }
+    }
+    if (overflow) *overflow = over;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    memset(bin, 0, 32);
+    bin[28] = *a >> 24; bin[29] = *a >> 16; bin[30] = *a >> 8; bin[31] = *a;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return *a == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    if (*a == 0) {
+        *r = 0;
+    } else {
+        *r = EXHAUSTIVE_TEST_ORDER - *a;
+    }
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return *a == 1;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    return *a > EXHAUSTIVE_TEST_ORDER / 2;
+}
+
+static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
+    SECP256K1_SCALAR_VERIFY(r);
+
+    if (flag) secp256k1_scalar_negate(r, r);
+
+    SECP256K1_SCALAR_VERIFY(r);
+    return flag ? -1 : 1;
+}
+
+static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    *r = (*a * *b) % EXHAUSTIVE_TEST_ORDER;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    *r1 = *a;
+    *r2 = 0;
+
+    SECP256K1_SCALAR_VERIFY(r1);
+    SECP256K1_SCALAR_VERIFY(r2);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_SCALAR_VERIFY(b);
+
+    return *a == *b;
+}
+
+static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
+    uint32_t mask0, mask1;
+    volatile int vflag = flag;
+    SECP256K1_SCALAR_VERIFY(a);
+    SECP256K1_CHECKMEM_CHECK_VERIFY(r, sizeof(*r));
+
+    mask0 = vflag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    *r = (*r & mask0) | (*a & mask1);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
+    int i;
+    uint32_t res = 0;
+    SECP256K1_SCALAR_VERIFY(x);
+
+    for (i = 0; i < EXHAUSTIVE_TEST_ORDER; i++) {
+        if ((i * *x) % EXHAUSTIVE_TEST_ORDER == 1) {
+            res = i;
+            break;
+        }
+    }
+
+    /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus
+     * have a composite group order; fix it in exhaustive_tests.c). */
+    VERIFY_CHECK(res != 0);
+    *r = res;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
+    SECP256K1_SCALAR_VERIFY(x);
+
+    secp256k1_scalar_inverse(r, x);
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    SECP256K1_SCALAR_VERIFY(a);
+
+    *r = (*a + ((-(uint32_t)(*a & 1)) & EXHAUSTIVE_TEST_ORDER)) >> 1;
+
+    SECP256K1_SCALAR_VERIFY(r);
+}
+
+#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
--- a/libsecp256k1/src/scratch.h
+++ b/libsecp256k1/src/scratch.h
@@ -0,0 +1,44 @@
+/***********************************************************************
+ * Copyright (c) 2017 Andrew Poelstra                                  *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCRATCH_H
+#define SECP256K1_SCRATCH_H
+
+/* The typedef is used internally; the struct name is used in the public API
+ * (where it is exposed as a different typedef) */
+typedef struct secp256k1_scratch_space_struct {
+    /** guard against interpreting this object as other types */
+    unsigned char magic[8];
+    /** actual allocated data */
+    void *data;
+    /** amount that has been allocated (i.e. `data + offset` is the next
+     *  available pointer)  */
+    size_t alloc_size;
+    /** maximum size available to allocate */
+    size_t max_size;
+} secp256k1_scratch;
+
+typedef struct secp256k1_scratch_space_struct secp256k1_scratch_space;
+
+static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t max_size);
+
+static void secp256k1_scratch_destroy(const secp256k1_callback* error_callback, secp256k1_scratch* scratch);
+
+/** Returns an opaque object used to "checkpoint" a scratch space. Used
+ *  with `secp256k1_scratch_apply_checkpoint` to undo allocations. */
+static size_t secp256k1_scratch_checkpoint(const secp256k1_callback* error_callback, const secp256k1_scratch* scratch);
+
+/** Applies a check point received from `secp256k1_scratch_checkpoint`,
+ *  undoing all allocations since that point. */
+static void secp256k1_scratch_apply_checkpoint(const secp256k1_callback* error_callback, secp256k1_scratch* scratch, size_t checkpoint);
+
+/** Returns the maximum allocation the scratch space will allow */
+static size_t secp256k1_scratch_max_allocation(const secp256k1_callback* error_callback, const secp256k1_scratch* scratch, size_t n_objects);
+
+/** Returns a pointer into the most recently allocated frame, or NULL if there is insufficient available space */
+static void *secp256k1_scratch_alloc(const secp256k1_callback* error_callback, secp256k1_scratch* scratch, size_t n);
+
+#endif
--- a/libsecp256k1/src/scratch_impl.h
+++ b/libsecp256k1/src/scratch_impl.h
@@ -0,0 +1,99 @@
+/***********************************************************************
+ * Copyright (c) 2017 Andrew Poelstra                                  *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SCRATCH_IMPL_H
+#define SECP256K1_SCRATCH_IMPL_H
+
+#include "util.h"
+#include "scratch.h"
+
+static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t size) {
+    const size_t base_alloc = ROUND_TO_ALIGN(sizeof(secp256k1_scratch));
+    void *alloc = checked_malloc(error_callback, base_alloc + size);
+    secp256k1_scratch* ret = (secp256k1_scratch *)alloc;
+    if (ret != NULL) {
+        memset(ret, 0, sizeof(*ret));
+        memcpy(ret->magic, "scratch", 8);
+        ret->data = (void *) ((char *) alloc + base_alloc);
+        ret->max_size = size;
+    }
+    return ret;
+}
+
+static void secp256k1_scratch_destroy(const secp256k1_callback* error_callback, secp256k1_scratch* scratch) {
+    if (scratch != NULL) {
+        if (secp256k1_memcmp_var(scratch->magic, "scratch", 8) != 0) {
+            secp256k1_callback_call(error_callback, "invalid scratch space");
+            return;
+        }
+        VERIFY_CHECK(scratch->alloc_size == 0); /* all checkpoints should be applied */
+        memset(scratch->magic, 0, sizeof(scratch->magic));
+        free(scratch);
+    }
+}
+
+static size_t secp256k1_scratch_checkpoint(const secp256k1_callback* error_callback, const secp256k1_scratch* scratch) {
+    if (secp256k1_memcmp_var(scratch->magic, "scratch", 8) != 0) {
+        secp256k1_callback_call(error_callback, "invalid scratch space");
+        return 0;
+    }
+    return scratch->alloc_size;
+}
+
+static void secp256k1_scratch_apply_checkpoint(const secp256k1_callback* error_callback, secp256k1_scratch* scratch, size_t checkpoint) {
+    if (secp256k1_memcmp_var(scratch->magic, "scratch", 8) != 0) {
+        secp256k1_callback_call(error_callback, "invalid scratch space");
+        return;
+    }
+    if (checkpoint > scratch->alloc_size) {
+        secp256k1_callback_call(error_callback, "invalid checkpoint");
+        return;
+    }
+    scratch->alloc_size = checkpoint;
+}
+
+static size_t secp256k1_scratch_max_allocation(const secp256k1_callback* error_callback, const secp256k1_scratch* scratch, size_t objects) {
+    if (secp256k1_memcmp_var(scratch->magic, "scratch", 8) != 0) {
+        secp256k1_callback_call(error_callback, "invalid scratch space");
+        return 0;
+    }
+    /* Ensure that multiplication will not wrap around */
+    if (ALIGNMENT > 1 && objects > SIZE_MAX/(ALIGNMENT - 1)) {
+        return 0;
+    }
+    if (scratch->max_size - scratch->alloc_size <= objects * (ALIGNMENT - 1)) {
+        return 0;
+    }
+    return scratch->max_size - scratch->alloc_size - objects * (ALIGNMENT - 1);
+}
+
+static void *secp256k1_scratch_alloc(const secp256k1_callback* error_callback, secp256k1_scratch* scratch, size_t size) {
+    void *ret;
+    size_t rounded_size;
+
+    rounded_size = ROUND_TO_ALIGN(size);
+    /* Check that rounding did not wrap around */
+    if (rounded_size < size) {
+        return NULL;
+    }
+    size = rounded_size;
+
+    if (secp256k1_memcmp_var(scratch->magic, "scratch", 8) != 0) {
+        secp256k1_callback_call(error_callback, "invalid scratch space");
+        return NULL;
+    }
+
+    if (size > scratch->max_size - scratch->alloc_size) {
+        return NULL;
+    }
+    ret = (void *) ((char *) scratch->data + scratch->alloc_size);
+    memset(ret, 0, size);
+    scratch->alloc_size += size;
+
+    return ret;
+}
+
+#endif
--- a/libsecp256k1/src/secp256k1.c
+++ b/libsecp256k1/src/secp256k1.c
@@ -0,0 +1,831 @@
+/***********************************************************************
+ * Copyright (c) 2013-2015 Pieter Wuille                               *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+/* This is a C project. It should not be compiled with a C++ compiler,
+ * and we error out if we detect one.
+ *
+ * We still want to be able to test the project with a C++ compiler
+ * because it is still good to know if this will lead to real trouble, so
+ * there is a possibility to override the check. But be warned that
+ * compiling with a C++ compiler is not supported. */
+#if defined(__cplusplus) && !defined(SECP256K1_CPLUSPLUS_TEST_OVERRIDE)
+#error Trying to compile a C project with a C++ compiler.
+#endif
+
+#define SECP256K1_BUILD
+
+#include "../include/secp256k1.h"
+#include "../include/secp256k1_preallocated.h"
+
+#include "assumptions.h"
+#include "checkmem.h"
+#include "util.h"
+
+#include "field_impl.h"
+#include "scalar_impl.h"
+#include "group_impl.h"
+#include "ecmult_impl.h"
+#include "ecmult_const_impl.h"
+#include "ecmult_gen_impl.h"
+#include "ecdsa_impl.h"
+#include "eckey_impl.h"
+#include "hash_impl.h"
+#include "int128_impl.h"
+#include "scratch_impl.h"
+#include "selftest.h"
+#include "hsort_impl.h"
+
+#ifdef SECP256K1_NO_BUILD
+# error "secp256k1.h processed without SECP256K1_BUILD defined while building secp256k1.c"
+#endif
+
+#define ARG_CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        secp256k1_callback_call(&ctx->illegal_callback, #cond); \
+        return 0; \
+    } \
+} while(0)
+
+#define ARG_CHECK_VOID(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        secp256k1_callback_call(&ctx->illegal_callback, #cond); \
+        return; \
+    } \
+} while(0)
+
+/* Note that whenever you change the context struct, you must also change the
+ * context_eq function. */
+struct secp256k1_context_struct {
+    secp256k1_ecmult_gen_context ecmult_gen_ctx;
+    secp256k1_callback illegal_callback;
+    secp256k1_callback error_callback;
+    int declassify;
+};
+
+static const secp256k1_context secp256k1_context_static_ = {
+    { 0 },
+    { secp256k1_default_illegal_callback_fn, 0 },
+    { secp256k1_default_error_callback_fn, 0 },
+    0
+};
+const secp256k1_context *secp256k1_context_static = &secp256k1_context_static_;
+const secp256k1_context *secp256k1_context_no_precomp = &secp256k1_context_static_;
+
+/* Helper function that determines if a context is proper, i.e., is not the static context or a copy thereof.
+ *
+ * This is intended for "context" functions such as secp256k1_context_clone. Functions that need specific
+ * features of a context should still check for these features directly. For example, a function that needs
+ * ecmult_gen should directly check for the existence of the ecmult_gen context. */
+static int secp256k1_context_is_proper(const secp256k1_context* ctx) {
+    return secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx);
+}
+
+void secp256k1_selftest(void) {
+    if (!secp256k1_selftest_passes()) {
+        secp256k1_callback_call(&default_error_callback, "self test failed");
+    }
+}
+
+size_t secp256k1_context_preallocated_size(unsigned int flags) {
+    size_t ret = sizeof(secp256k1_context);
+    /* A return value of 0 is reserved as an indicator for errors when we call this function internally. */
+    VERIFY_CHECK(ret != 0);
+
+    if (EXPECT((flags & SECP256K1_FLAGS_TYPE_MASK) != SECP256K1_FLAGS_TYPE_CONTEXT, 0)) {
+            secp256k1_callback_call(&default_illegal_callback,
+                                    "Invalid flags");
+            return 0;
+    }
+
+    if (EXPECT(!SECP256K1_CHECKMEM_RUNNING() && (flags & SECP256K1_FLAGS_BIT_CONTEXT_DECLASSIFY), 0)) {
+            secp256k1_callback_call(&default_illegal_callback,
+                                    "Declassify flag requires running with memory checking");
+            return 0;
+    }
+
+    return ret;
+}
+
+size_t secp256k1_context_preallocated_clone_size(const secp256k1_context* ctx) {
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(secp256k1_context_is_proper(ctx));
+    return sizeof(secp256k1_context);
+}
+
+secp256k1_context* secp256k1_context_preallocated_create(void* prealloc, unsigned int flags) {
+    size_t prealloc_size;
+    secp256k1_context* ret;
+
+    secp256k1_selftest();
+
+    prealloc_size = secp256k1_context_preallocated_size(flags);
+    if (prealloc_size == 0) {
+        return NULL;
+    }
+    VERIFY_CHECK(prealloc != NULL);
+    ret = (secp256k1_context*)prealloc;
+    ret->illegal_callback = default_illegal_callback;
+    ret->error_callback = default_error_callback;
+
+    /* Flags have been checked by secp256k1_context_preallocated_size. */
+    VERIFY_CHECK((flags & SECP256K1_FLAGS_TYPE_MASK) == SECP256K1_FLAGS_TYPE_CONTEXT);
+    secp256k1_ecmult_gen_context_build(&ret->ecmult_gen_ctx);
+    ret->declassify = !!(flags & SECP256K1_FLAGS_BIT_CONTEXT_DECLASSIFY);
+
+    return ret;
+}
+
+secp256k1_context* secp256k1_context_create(unsigned int flags) {
+    size_t const prealloc_size = secp256k1_context_preallocated_size(flags);
+    secp256k1_context* ctx = (secp256k1_context*)checked_malloc(&default_error_callback, prealloc_size);
+    if (EXPECT(secp256k1_context_preallocated_create(ctx, flags) == NULL, 0)) {
+        free(ctx);
+        return NULL;
+    }
+
+    return ctx;
+}
+
+secp256k1_context* secp256k1_context_preallocated_clone(const secp256k1_context* ctx, void* prealloc) {
+    secp256k1_context* ret;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(prealloc != NULL);
+    ARG_CHECK(secp256k1_context_is_proper(ctx));
+
+    ret = (secp256k1_context*)prealloc;
+    *ret = *ctx;
+    return ret;
+}
+
+secp256k1_context* secp256k1_context_clone(const secp256k1_context* ctx) {
+    secp256k1_context* ret;
+    size_t prealloc_size;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(secp256k1_context_is_proper(ctx));
+
+    prealloc_size = secp256k1_context_preallocated_clone_size(ctx);
+    ret = (secp256k1_context*)checked_malloc(&ctx->error_callback, prealloc_size);
+    ret = secp256k1_context_preallocated_clone(ctx, ret);
+    return ret;
+}
+
+void secp256k1_context_preallocated_destroy(secp256k1_context* ctx) {
+    ARG_CHECK_VOID(ctx == NULL || secp256k1_context_is_proper(ctx));
+
+    /* Defined as noop */
+    if (ctx == NULL) {
+        return;
+    }
+
+    secp256k1_ecmult_gen_context_clear(&ctx->ecmult_gen_ctx);
+}
+
+void secp256k1_context_destroy(secp256k1_context* ctx) {
+    ARG_CHECK_VOID(ctx == NULL || secp256k1_context_is_proper(ctx));
+
+    /* Defined as noop */
+    if (ctx == NULL) {
+        return;
+    }
+
+    secp256k1_context_preallocated_destroy(ctx);
+    free(ctx);
+}
+
+void secp256k1_context_set_illegal_callback(secp256k1_context* ctx, void (*fun)(const char* message, void* data), const void* data) {
+    /* We compare pointers instead of checking secp256k1_context_is_proper() here
+       because setting callbacks is allowed on *copies* of the static context:
+       it's harmless and makes testing easier. */
+    ARG_CHECK_VOID(ctx != secp256k1_context_static);
+    if (fun == NULL) {
+        fun = secp256k1_default_illegal_callback_fn;
+    }
+    ctx->illegal_callback.fn = fun;
+    ctx->illegal_callback.data = data;
+}
+
+void secp256k1_context_set_error_callback(secp256k1_context* ctx, void (*fun)(const char* message, void* data), const void* data) {
+    /* We compare pointers instead of checking secp256k1_context_is_proper() here
+       because setting callbacks is allowed on *copies* of the static context:
+       it's harmless and makes testing easier. */
+    ARG_CHECK_VOID(ctx != secp256k1_context_static);
+    if (fun == NULL) {
+        fun = secp256k1_default_error_callback_fn;
+    }
+    ctx->error_callback.fn = fun;
+    ctx->error_callback.data = data;
+}
+
+static secp256k1_scratch_space* secp256k1_scratch_space_create(const secp256k1_context* ctx, size_t max_size) {
+    VERIFY_CHECK(ctx != NULL);
+    return secp256k1_scratch_create(&ctx->error_callback, max_size);
+}
+
+static void secp256k1_scratch_space_destroy(const secp256k1_context *ctx, secp256k1_scratch_space* scratch) {
+    VERIFY_CHECK(ctx != NULL);
+    secp256k1_scratch_destroy(&ctx->error_callback, scratch);
+}
+
+/* Mark memory as no-longer-secret for the purpose of analysing constant-time behaviour
+ *  of the software.
+ */
+static SECP256K1_INLINE void secp256k1_declassify(const secp256k1_context* ctx, const void *p, size_t len) {
+    if (EXPECT(ctx->declassify, 0)) SECP256K1_CHECKMEM_DEFINE(p, len);
+}
+
+static int secp256k1_pubkey_load(const secp256k1_context* ctx, secp256k1_ge* ge, const secp256k1_pubkey* pubkey) {
+    secp256k1_ge_from_bytes(ge, pubkey->data);
+    ARG_CHECK(!secp256k1_fe_is_zero(&ge->x));
+    return 1;
+}
+
+static void secp256k1_pubkey_save(secp256k1_pubkey* pubkey, secp256k1_ge* ge) {
+    secp256k1_ge_to_bytes(pubkey->data, ge);
+}
+
+int secp256k1_ec_pubkey_parse(const secp256k1_context* ctx, secp256k1_pubkey* pubkey, const unsigned char *input, size_t inputlen) {
+    secp256k1_ge Q;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ARG_CHECK(input != NULL);
+    if (!secp256k1_eckey_pubkey_parse(&Q, input, inputlen)) {
+        return 0;
+    }
+    if (!secp256k1_ge_is_in_correct_subgroup(&Q)) {
+        return 0;
+    }
+    secp256k1_pubkey_save(pubkey, &Q);
+    secp256k1_ge_clear(&Q);
+    return 1;
+}
+
+int secp256k1_ec_pubkey_serialize(const secp256k1_context* ctx, unsigned char *output, size_t *outputlen, const secp256k1_pubkey* pubkey, unsigned int flags) {
+    secp256k1_ge Q;
+    size_t len;
+    int ret = 0;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(outputlen != NULL);
+    ARG_CHECK(*outputlen >= ((flags & SECP256K1_FLAGS_BIT_COMPRESSION) ? 33u : 65u));
+    len = *outputlen;
+    *outputlen = 0;
+    ARG_CHECK(output != NULL);
+    memset(output, 0, len);
+    ARG_CHECK(pubkey != NULL);
+    ARG_CHECK((flags & SECP256K1_FLAGS_TYPE_MASK) == SECP256K1_FLAGS_TYPE_COMPRESSION);
+    if (secp256k1_pubkey_load(ctx, &Q, pubkey)) {
+        ret = secp256k1_eckey_pubkey_serialize(&Q, output, &len, flags & SECP256K1_FLAGS_BIT_COMPRESSION);
+        if (ret) {
+            *outputlen = len;
+        }
+    }
+    return ret;
+}
+
+int secp256k1_ec_pubkey_cmp(const secp256k1_context* ctx, const secp256k1_pubkey* pubkey0, const secp256k1_pubkey* pubkey1) {
+    unsigned char out[2][33];
+    const secp256k1_pubkey* pk[2];
+    int i;
+
+    VERIFY_CHECK(ctx != NULL);
+    pk[0] = pubkey0; pk[1] = pubkey1;
+    for (i = 0; i < 2; i++) {
+        size_t out_size = sizeof(out[i]);
+        /* If the public key is NULL or invalid, ec_pubkey_serialize will call
+         * the illegal_callback and return 0. In that case we will serialize the
+         * key as all zeros which is less than any valid public key. This
+         * results in consistent comparisons even if NULL or invalid pubkeys are
+         * involved and prevents edge cases such as sorting algorithms that use
+         * this function and do not terminate as a result. */
+        if (!secp256k1_ec_pubkey_serialize(ctx, out[i], &out_size, pk[i], SECP256K1_EC_COMPRESSED)) {
+            /* Note that ec_pubkey_serialize should already set the output to
+             * zero in that case, but it's not guaranteed by the API, we can't
+             * test it and writing a VERIFY_CHECK is more complex than
+             * explicitly memsetting (again). */
+            memset(out[i], 0, sizeof(out[i]));
+        }
+    }
+    return secp256k1_memcmp_var(out[0], out[1], sizeof(out[0]));
+}
+
+static int secp256k1_ec_pubkey_sort_cmp(const void* pk1, const void* pk2, void *ctx) {
+    return secp256k1_ec_pubkey_cmp((secp256k1_context *)ctx,
+                                     *(secp256k1_pubkey **)pk1,
+                                     *(secp256k1_pubkey **)pk2);
+}
+
+int secp256k1_ec_pubkey_sort(const secp256k1_context* ctx, const secp256k1_pubkey **pubkeys, size_t n_pubkeys) {
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkeys != NULL);
+
+    /* Suppress wrong warning (fixed in MSVC 19.33) */
+    #if defined(_MSC_VER) && (_MSC_VER < 1933)
+    #pragma warning(push)
+    #pragma warning(disable: 4090)
+    #endif
+
+    /* Casting away const is fine because neither secp256k1_hsort nor
+     * secp256k1_ec_pubkey_sort_cmp modify the data pointed to by the cmp_data
+     * argument. */
+    secp256k1_hsort(pubkeys, n_pubkeys, sizeof(*pubkeys), secp256k1_ec_pubkey_sort_cmp, (void *)ctx);
+
+    #if defined(_MSC_VER) && (_MSC_VER < 1933)
+    #pragma warning(pop)
+    #endif
+
+    return 1;
+}
+
+static void secp256k1_ecdsa_signature_load(const secp256k1_context* ctx, secp256k1_scalar* r, secp256k1_scalar* s, const secp256k1_ecdsa_signature* sig) {
+    (void)ctx;
+    if (sizeof(secp256k1_scalar) == 32) {
+        /* When the secp256k1_scalar type is exactly 32 byte, use its
+         * representation inside secp256k1_ecdsa_signature, as conversion is very fast.
+         * Note that secp256k1_ecdsa_signature_save must use the same representation. */
+        memcpy(r, &sig->data[0], 32);
+        memcpy(s, &sig->data[32], 32);
+    } else {
+        secp256k1_scalar_set_b32(r, &sig->data[0], NULL);
+        secp256k1_scalar_set_b32(s, &sig->data[32], NULL);
+    }
+}
+
+static void secp256k1_ecdsa_signature_save(secp256k1_ecdsa_signature* sig, const secp256k1_scalar* r, const secp256k1_scalar* s) {
+    if (sizeof(secp256k1_scalar) == 32) {
+        memcpy(&sig->data[0], r, 32);
+        memcpy(&sig->data[32], s, 32);
+    } else {
+        secp256k1_scalar_get_b32(&sig->data[0], r);
+        secp256k1_scalar_get_b32(&sig->data[32], s);
+    }
+}
+
+int secp256k1_ecdsa_signature_parse_der(const secp256k1_context* ctx, secp256k1_ecdsa_signature* sig, const unsigned char *input, size_t inputlen) {
+    secp256k1_scalar r, s;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(sig != NULL);
+    ARG_CHECK(input != NULL);
+
+    if (secp256k1_ecdsa_sig_parse(&r, &s, input, inputlen)) {
+        secp256k1_ecdsa_signature_save(sig, &r, &s);
+        return 1;
+    } else {
+        memset(sig, 0, sizeof(*sig));
+        return 0;
+    }
+}
+
+int secp256k1_ecdsa_signature_parse_compact(const secp256k1_context* ctx, secp256k1_ecdsa_signature* sig, const unsigned char *input64) {
+    secp256k1_scalar r, s;
+    int ret = 1;
+    int overflow = 0;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(sig != NULL);
+    ARG_CHECK(input64 != NULL);
+
+    secp256k1_scalar_set_b32(&r, &input64[0], &overflow);
+    ret &= !overflow;
+    secp256k1_scalar_set_b32(&s, &input64[32], &overflow);
+    ret &= !overflow;
+    if (ret) {
+        secp256k1_ecdsa_signature_save(sig, &r, &s);
+    } else {
+        memset(sig, 0, sizeof(*sig));
+    }
+    return ret;
+}
+
+int secp256k1_ecdsa_signature_serialize_der(const secp256k1_context* ctx, unsigned char *output, size_t *outputlen, const secp256k1_ecdsa_signature* sig) {
+    secp256k1_scalar r, s;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(output != NULL);
+    ARG_CHECK(outputlen != NULL);
+    ARG_CHECK(sig != NULL);
+
+    secp256k1_ecdsa_signature_load(ctx, &r, &s, sig);
+    return secp256k1_ecdsa_sig_serialize(output, outputlen, &r, &s);
+}
+
+int secp256k1_ecdsa_signature_serialize_compact(const secp256k1_context* ctx, unsigned char *output64, const secp256k1_ecdsa_signature* sig) {
+    secp256k1_scalar r, s;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(output64 != NULL);
+    ARG_CHECK(sig != NULL);
+
+    secp256k1_ecdsa_signature_load(ctx, &r, &s, sig);
+    secp256k1_scalar_get_b32(&output64[0], &r);
+    secp256k1_scalar_get_b32(&output64[32], &s);
+    return 1;
+}
+
+int secp256k1_ecdsa_signature_normalize(const secp256k1_context* ctx, secp256k1_ecdsa_signature *sigout, const secp256k1_ecdsa_signature *sigin) {
+    secp256k1_scalar r, s;
+    int ret = 0;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(sigin != NULL);
+
+    secp256k1_ecdsa_signature_load(ctx, &r, &s, sigin);
+    ret = secp256k1_scalar_is_high(&s);
+    if (sigout != NULL) {
+        if (ret) {
+            secp256k1_scalar_negate(&s, &s);
+        }
+        secp256k1_ecdsa_signature_save(sigout, &r, &s);
+    }
+
+    return ret;
+}
+
+int secp256k1_ecdsa_verify(const secp256k1_context* ctx, const secp256k1_ecdsa_signature *sig, const unsigned char *msghash32, const secp256k1_pubkey *pubkey) {
+    secp256k1_ge q;
+    secp256k1_scalar r, s;
+    secp256k1_scalar m;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(msghash32 != NULL);
+    ARG_CHECK(sig != NULL);
+    ARG_CHECK(pubkey != NULL);
+
+    secp256k1_scalar_set_b32(&m, msghash32, NULL);
+    secp256k1_ecdsa_signature_load(ctx, &r, &s, sig);
+    return (!secp256k1_scalar_is_high(&s) &&
+            secp256k1_pubkey_load(ctx, &q, pubkey) &&
+            secp256k1_ecdsa_sig_verify(&r, &s, &q, &m));
+}
+
+static SECP256K1_INLINE void buffer_append(unsigned char *buf, unsigned int *offset, const void *data, unsigned int len) {
+    memcpy(buf + *offset, data, len);
+    *offset += len;
+}
+
+static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, void *data, unsigned int counter) {
+   unsigned char keydata[112];
+   unsigned int offset = 0;
+   secp256k1_rfc6979_hmac_sha256 rng;
+   unsigned int i;
+   secp256k1_scalar msg;
+   unsigned char msgmod32[32];
+   secp256k1_scalar_set_b32(&msg, msg32, NULL);
+   secp256k1_scalar_get_b32(msgmod32, &msg);
+   /* We feed a byte array to the PRNG as input, consisting of:
+    * - the private key (32 bytes) and reduced message (32 bytes), see RFC 6979 3.2d.
+    * - optionally 32 extra bytes of data, see RFC 6979 3.6 Additional Data.
+    * - optionally 16 extra bytes with the algorithm name.
+    * Because the arguments have distinct fixed lengths it is not possible for
+    *  different argument mixtures to emulate each other and result in the same
+    *  nonces.
+    */
+   buffer_append(keydata, &offset, key32, 32);
+   buffer_append(keydata, &offset, msgmod32, 32);
+   if (data != NULL) {
+       buffer_append(keydata, &offset, data, 32);
+   }
+   if (algo16 != NULL) {
+       buffer_append(keydata, &offset, algo16, 16);
+   }
+   secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, offset);
+   for (i = 0; i <= counter; i++) {
+       secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
+   }
+   secp256k1_rfc6979_hmac_sha256_finalize(&rng);
+
+   secp256k1_memclear(keydata, sizeof(keydata));
+   secp256k1_rfc6979_hmac_sha256_clear(&rng);
+   return 1;
+}
+
+const secp256k1_nonce_function secp256k1_nonce_function_rfc6979 = nonce_function_rfc6979;
+const secp256k1_nonce_function secp256k1_nonce_function_default = nonce_function_rfc6979;
+
+static int secp256k1_ecdsa_sign_inner(const secp256k1_context* ctx, secp256k1_scalar* r, secp256k1_scalar* s, int* recid, const unsigned char *msg32, const unsigned char *seckey, secp256k1_nonce_function noncefp, const void* noncedata) {
+    secp256k1_scalar sec, non, msg;
+    int ret = 0;
+    int is_sec_valid;
+    unsigned char nonce32[32];
+    unsigned int count = 0;
+    /* Default initialization here is important so we won't pass uninit values to the cmov in the end */
+    *r = secp256k1_scalar_zero;
+    *s = secp256k1_scalar_zero;
+    if (recid) {
+        *recid = 0;
+    }
+    if (noncefp == NULL) {
+        noncefp = secp256k1_nonce_function_default;
+    }
+
+    /* Fail if the secret key is invalid. */
+    is_sec_valid = secp256k1_scalar_set_b32_seckey(&sec, seckey);
+    secp256k1_scalar_cmov(&sec, &secp256k1_scalar_one, !is_sec_valid);
+    secp256k1_scalar_set_b32(&msg, msg32, NULL);
+    while (1) {
+        int is_nonce_valid;
+        ret = !!noncefp(nonce32, msg32, seckey, NULL, (void*)noncedata, count);
+        if (!ret) {
+            break;
+        }
+        is_nonce_valid = secp256k1_scalar_set_b32_seckey(&non, nonce32);
+        /* The nonce is still secret here, but it being invalid is less likely than 1:2^255. */
+        secp256k1_declassify(ctx, &is_nonce_valid, sizeof(is_nonce_valid));
+        if (is_nonce_valid) {
+            ret = secp256k1_ecdsa_sig_sign(&ctx->ecmult_gen_ctx, r, s, &sec, &msg, &non, recid);
+            /* The final signature is no longer a secret, nor is the fact that we were successful or not. */
+            secp256k1_declassify(ctx, &ret, sizeof(ret));
+            if (ret) {
+                break;
+            }
+        }
+        count++;
+    }
+    /* We don't want to declassify is_sec_valid and therefore the range of
+     * seckey. As a result is_sec_valid is included in ret only after ret was
+     * used as a branching variable. */
+    ret &= is_sec_valid;
+    secp256k1_memclear(nonce32, sizeof(nonce32));
+    secp256k1_scalar_clear(&msg);
+    secp256k1_scalar_clear(&non);
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_cmov(r, &secp256k1_scalar_zero, !ret);
+    secp256k1_scalar_cmov(s, &secp256k1_scalar_zero, !ret);
+    if (recid) {
+        const int zero = 0;
+        secp256k1_int_cmov(recid, &zero, !ret);
+    }
+    return ret;
+}
+
+int secp256k1_ecdsa_sign(const secp256k1_context* ctx, secp256k1_ecdsa_signature *signature, const unsigned char *msghash32, const unsigned char *seckey, secp256k1_nonce_function noncefp, const void* noncedata) {
+    secp256k1_scalar r, s;
+    int ret;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    ARG_CHECK(msghash32 != NULL);
+    ARG_CHECK(signature != NULL);
+    ARG_CHECK(seckey != NULL);
+
+    ret = secp256k1_ecdsa_sign_inner(ctx, &r, &s, NULL, msghash32, seckey, noncefp, noncedata);
+    secp256k1_ecdsa_signature_save(signature, &r, &s);
+    return ret;
+}
+
+int secp256k1_ec_seckey_verify(const secp256k1_context* ctx, const unsigned char *seckey) {
+    secp256k1_scalar sec;
+    int ret;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(seckey != NULL);
+
+    ret = secp256k1_scalar_set_b32_seckey(&sec, seckey);
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+static int secp256k1_ec_pubkey_create_helper(const secp256k1_ecmult_gen_context *ecmult_gen_ctx, secp256k1_scalar *seckey_scalar, secp256k1_ge *p, const unsigned char *seckey) {
+    secp256k1_gej pj;
+    int ret;
+
+    ret = secp256k1_scalar_set_b32_seckey(seckey_scalar, seckey);
+    secp256k1_scalar_cmov(seckey_scalar, &secp256k1_scalar_one, !ret);
+
+    secp256k1_ecmult_gen(ecmult_gen_ctx, &pj, seckey_scalar);
+    secp256k1_ge_set_gej(p, &pj);
+    secp256k1_gej_clear(&pj);
+    return ret;
+}
+
+int secp256k1_ec_pubkey_create(const secp256k1_context* ctx, secp256k1_pubkey *pubkey, const unsigned char *seckey) {
+    secp256k1_ge p;
+    secp256k1_scalar seckey_scalar;
+    int ret = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
+    ARG_CHECK(seckey != NULL);
+
+    ret = secp256k1_ec_pubkey_create_helper(&ctx->ecmult_gen_ctx, &seckey_scalar, &p, seckey);
+    secp256k1_pubkey_save(pubkey, &p);
+    secp256k1_memczero(pubkey, sizeof(*pubkey), !ret);
+
+    secp256k1_scalar_clear(&seckey_scalar);
+    return ret;
+}
+
+int secp256k1_ec_seckey_negate(const secp256k1_context* ctx, unsigned char *seckey) {
+    secp256k1_scalar sec;
+    int ret = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(seckey != NULL);
+
+    ret = secp256k1_scalar_set_b32_seckey(&sec, seckey);
+    secp256k1_scalar_cmov(&sec, &secp256k1_scalar_zero, !ret);
+    secp256k1_scalar_negate(&sec, &sec);
+    secp256k1_scalar_get_b32(seckey, &sec);
+
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+int secp256k1_ec_privkey_negate(const secp256k1_context* ctx, unsigned char *seckey) {
+    return secp256k1_ec_seckey_negate(ctx, seckey);
+}
+
+int secp256k1_ec_pubkey_negate(const secp256k1_context* ctx, secp256k1_pubkey *pubkey) {
+    int ret = 0;
+    secp256k1_ge p;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+
+    ret = secp256k1_pubkey_load(ctx, &p, pubkey);
+    memset(pubkey, 0, sizeof(*pubkey));
+    if (ret) {
+        secp256k1_ge_neg(&p, &p);
+        secp256k1_pubkey_save(pubkey, &p);
+    }
+    return ret;
+}
+
+
+static int secp256k1_ec_seckey_tweak_add_helper(secp256k1_scalar *sec, const unsigned char *tweak32) {
+    secp256k1_scalar term;
+    int overflow = 0;
+    int ret = 0;
+
+    secp256k1_scalar_set_b32(&term, tweak32, &overflow);
+    ret = (!overflow) & secp256k1_eckey_privkey_tweak_add(sec, &term);
+    secp256k1_scalar_clear(&term);
+    return ret;
+}
+
+int secp256k1_ec_seckey_tweak_add(const secp256k1_context* ctx, unsigned char *seckey, const unsigned char *tweak32) {
+    secp256k1_scalar sec;
+    int ret = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(seckey != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    ret = secp256k1_scalar_set_b32_seckey(&sec, seckey);
+    ret &= secp256k1_ec_seckey_tweak_add_helper(&sec, tweak32);
+    secp256k1_scalar_cmov(&sec, &secp256k1_scalar_zero, !ret);
+    secp256k1_scalar_get_b32(seckey, &sec);
+
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+int secp256k1_ec_privkey_tweak_add(const secp256k1_context* ctx, unsigned char *seckey, const unsigned char *tweak32) {
+    return secp256k1_ec_seckey_tweak_add(ctx, seckey, tweak32);
+}
+
+static int secp256k1_ec_pubkey_tweak_add_helper(secp256k1_ge *p, const unsigned char *tweak32) {
+    secp256k1_scalar term;
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&term, tweak32, &overflow);
+    return !overflow && secp256k1_eckey_pubkey_tweak_add(p, &term);
+}
+
+int secp256k1_ec_pubkey_tweak_add(const secp256k1_context* ctx, secp256k1_pubkey *pubkey, const unsigned char *tweak32) {
+    secp256k1_ge p;
+    int ret = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    ret = secp256k1_pubkey_load(ctx, &p, pubkey);
+    memset(pubkey, 0, sizeof(*pubkey));
+    ret = ret && secp256k1_ec_pubkey_tweak_add_helper(&p, tweak32);
+    if (ret) {
+        secp256k1_pubkey_save(pubkey, &p);
+    }
+
+    return ret;
+}
+
+int secp256k1_ec_seckey_tweak_mul(const secp256k1_context* ctx, unsigned char *seckey, const unsigned char *tweak32) {
+    secp256k1_scalar factor;
+    secp256k1_scalar sec;
+    int ret = 0;
+    int overflow = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(seckey != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    secp256k1_scalar_set_b32(&factor, tweak32, &overflow);
+    ret = secp256k1_scalar_set_b32_seckey(&sec, seckey);
+    ret &= (!overflow) & secp256k1_eckey_privkey_tweak_mul(&sec, &factor);
+    secp256k1_scalar_cmov(&sec, &secp256k1_scalar_zero, !ret);
+    secp256k1_scalar_get_b32(seckey, &sec);
+
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_clear(&factor);
+    return ret;
+}
+
+int secp256k1_ec_privkey_tweak_mul(const secp256k1_context* ctx, unsigned char *seckey, const unsigned char *tweak32) {
+    return secp256k1_ec_seckey_tweak_mul(ctx, seckey, tweak32);
+}
+
+int secp256k1_ec_pubkey_tweak_mul(const secp256k1_context* ctx, secp256k1_pubkey *pubkey, const unsigned char *tweak32) {
+    secp256k1_ge p;
+    secp256k1_scalar factor;
+    int ret = 0;
+    int overflow = 0;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubkey != NULL);
+    ARG_CHECK(tweak32 != NULL);
+
+    secp256k1_scalar_set_b32(&factor, tweak32, &overflow);
+    ret = !overflow && secp256k1_pubkey_load(ctx, &p, pubkey);
+    memset(pubkey, 0, sizeof(*pubkey));
+    if (ret) {
+        if (secp256k1_eckey_pubkey_tweak_mul(&p, &factor)) {
+            secp256k1_pubkey_save(pubkey, &p);
+        } else {
+            ret = 0;
+        }
+    }
+
+    return ret;
+}
+
+int secp256k1_context_randomize(secp256k1_context* ctx, const unsigned char *seed32) {
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(secp256k1_context_is_proper(ctx));
+
+    if (secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)) {
+        secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32);
+    }
+    return 1;
+}
+
+int secp256k1_ec_pubkey_combine(const secp256k1_context* ctx, secp256k1_pubkey *pubnonce, const secp256k1_pubkey * const *pubnonces, size_t n) {
+    size_t i;
+    secp256k1_gej Qj;
+    secp256k1_ge Q;
+
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(pubnonce != NULL);
+    memset(pubnonce, 0, sizeof(*pubnonce));
+    ARG_CHECK(n >= 1);
+    ARG_CHECK(pubnonces != NULL);
+
+    secp256k1_gej_set_infinity(&Qj);
+
+    for (i = 0; i < n; i++) {
+        ARG_CHECK(pubnonces[i] != NULL);
+        secp256k1_pubkey_load(ctx, &Q, pubnonces[i]);
+        secp256k1_gej_add_ge(&Qj, &Qj, &Q);
+    }
+    if (secp256k1_gej_is_infinity(&Qj)) {
+        return 0;
+    }
+    secp256k1_ge_set_gej(&Q, &Qj);
+    secp256k1_pubkey_save(pubnonce, &Q);
+    return 1;
+}
+
+int secp256k1_tagged_sha256(const secp256k1_context* ctx, unsigned char *hash32, const unsigned char *tag, size_t taglen, const unsigned char *msg, size_t msglen) {
+    secp256k1_sha256 sha;
+    VERIFY_CHECK(ctx != NULL);
+    ARG_CHECK(hash32 != NULL);
+    ARG_CHECK(tag != NULL);
+    ARG_CHECK(msg != NULL);
+
+    secp256k1_sha256_initialize_tagged(&sha, tag, taglen);
+    secp256k1_sha256_write(&sha, msg, msglen);
+    secp256k1_sha256_finalize(&sha, hash32);
+    secp256k1_sha256_clear(&sha);
+    return 1;
+}
+
+#ifdef ENABLE_MODULE_ECDH
+# include "modules/ecdh/main_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_RECOVERY
+# include "modules/recovery/main_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_EXTRAKEYS
+# include "modules/extrakeys/main_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_SCHNORRSIG
+# include "modules/schnorrsig/main_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_MUSIG
+# include "modules/musig/main_impl.h"
+#endif
+
+#ifdef ENABLE_MODULE_ELLSWIFT
+# include "modules/ellswift/main_impl.h"
+#endif
--- a/libsecp256k1/src/selftest.h
+++ b/libsecp256k1/src/selftest.h
@@ -0,0 +1,32 @@
+/***********************************************************************
+ * Copyright (c) 2020 Pieter Wuille                                    *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_SELFTEST_H
+#define SECP256K1_SELFTEST_H
+
+#include "hash.h"
+
+#include <string.h>
+
+static int secp256k1_selftest_sha256(void) {
+    static const char *input63 = "For this sample, this 63-byte string will be used as input data";
+    static const unsigned char output32[32] = {
+        0xf0, 0x8a, 0x78, 0xcb, 0xba, 0xee, 0x08, 0x2b, 0x05, 0x2a, 0xe0, 0x70, 0x8f, 0x32, 0xfa, 0x1e,
+        0x50, 0xc5, 0xc4, 0x21, 0xaa, 0x77, 0x2b, 0xa5, 0xdb, 0xb4, 0x06, 0xa2, 0xea, 0x6b, 0xe3, 0x42,
+    };
+    unsigned char out[32];
+    secp256k1_sha256 hasher;
+    secp256k1_sha256_initialize(&hasher);
+    secp256k1_sha256_write(&hasher, (const unsigned char*)input63, 63);
+    secp256k1_sha256_finalize(&hasher, out);
+    return secp256k1_memcmp_var(out, output32, 32) == 0;
+}
+
+static int secp256k1_selftest_passes(void) {
+    return secp256k1_selftest_sha256();
+}
+
+#endif /* SECP256K1_SELFTEST_H */
--- a/libsecp256k1/src/util.h
+++ b/libsecp256k1/src/util.h
@@ -0,0 +1,451 @@
+/***********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying    *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
+ ***********************************************************************/
+
+#ifndef SECP256K1_UTIL_H
+#define SECP256K1_UTIL_H
+
+#include "../include/secp256k1.h"
+#include "checkmem.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#if defined(_MSC_VER)
+/* For SecureZeroMemory */
+#include <Windows.h>
+#endif
+
+#define STR_(x) #x
+#define STR(x) STR_(x)
+#define DEBUG_CONFIG_MSG(x) "DEBUG_CONFIG: " x
+#define DEBUG_CONFIG_DEF(x) DEBUG_CONFIG_MSG(#x "=" STR(x))
+
+/* Debug helper for printing arrays of unsigned char. */
+#define PRINT_BUF(buf, len) do { \
+    printf("%s[%lu] = ", #buf, (unsigned long)len); \
+    print_buf_plain(buf, len); \
+} while(0)
+
+static void print_buf_plain(const unsigned char *buf, size_t len) {
+    size_t i;
+    printf("{");
+    for (i = 0; i < len; i++) {
+        if (i % 8 == 0) {
+            printf("\n    ");
+        } else {
+            printf(" ");
+        }
+        printf("0x%02X,", buf[i]);
+    }
+    printf("\n}\n");
+}
+
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(2,7)
+#   define SECP256K1_INLINE __inline__
+#  elif (defined(_MSC_VER))
+#   define SECP256K1_INLINE __inline
+#  else
+#   define SECP256K1_INLINE
+#  endif
+# else
+#  define SECP256K1_INLINE inline
+# endif
+
+/** Assert statically that expr is true.
+ *
+ * This is a statement-like macro and can only be used inside functions.
+ */
+#define STATIC_ASSERT(expr) do { \
+    switch(0) { \
+        case 0: \
+        /* If expr evaluates to 0, we have two case labels "0", which is illegal. */ \
+        case /* ERROR: static assertion failed */ (expr): \
+        ; \
+    } \
+} while(0)
+
+/** Assert statically that expr is an integer constant expression, and run stmt.
+ *
+ * Useful for example to enforce that magnitude arguments are constant.
+ */
+#define ASSERT_INT_CONST_AND_DO(expr, stmt) do { \
+    switch(42) { \
+        /* C allows only integer constant expressions as case labels. */ \
+        case /* ERROR: integer argument is not constant */ (expr): \
+            break; \
+        default: ; \
+    } \
+    stmt; \
+} while(0)
+
+typedef struct {
+    void (*fn)(const char *text, void* data);
+    const void* data;
+} secp256k1_callback;
+
+static SECP256K1_INLINE void secp256k1_callback_call(const secp256k1_callback * const cb, const char * const text) {
+    cb->fn(text, (void*)cb->data);
+}
+
+#ifndef USE_EXTERNAL_DEFAULT_CALLBACKS
+static void secp256k1_default_illegal_callback_fn(const char* str, void* data) {
+    (void)data;
+    fprintf(stderr, "[libsecp256k1] illegal argument: %s\n", str);
+    abort();
+}
+static void secp256k1_default_error_callback_fn(const char* str, void* data) {
+    (void)data;
+    fprintf(stderr, "[libsecp256k1] internal consistency check failed: %s\n", str);
+    abort();
+}
+#else
+void secp256k1_default_illegal_callback_fn(const char* str, void* data);
+void secp256k1_default_error_callback_fn(const char* str, void* data);
+#endif
+
+static const secp256k1_callback default_illegal_callback = {
+    secp256k1_default_illegal_callback_fn,
+    NULL
+};
+
+static const secp256k1_callback default_error_callback = {
+    secp256k1_default_error_callback_fn,
+    NULL
+};
+
+
+#ifdef DETERMINISTIC
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s\n", msg); \
+    abort(); \
+} while(0);
+#else
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, msg); \
+    abort(); \
+} while(0)
+#endif
+
+#if SECP256K1_GNUC_PREREQ(3, 0)
+#define EXPECT(x,c) __builtin_expect((x),(c))
+#else
+#define EXPECT(x,c) (x)
+#endif
+
+#ifdef DETERMINISTIC
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed"); \
+    } \
+} while(0)
+#else
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed: " #cond); \
+    } \
+} while(0)
+#endif
+
+/* Like assert(), but when VERIFY is defined. */
+#if defined(VERIFY)
+#define VERIFY_CHECK CHECK
+#else
+#define VERIFY_CHECK(cond)
+#endif
+
+static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_t size) {
+    void *ret = malloc(size);
+    if (ret == NULL) {
+        secp256k1_callback_call(cb, "Out of memory");
+    }
+    return ret;
+}
+
+#if defined(__BIGGEST_ALIGNMENT__)
+#define ALIGNMENT __BIGGEST_ALIGNMENT__
+#else
+/* Using 16 bytes alignment because common architectures never have alignment
+ * requirements above 8 for any of the types we care about. In addition we
+ * leave some room because currently we don't care about a few bytes. */
+#define ALIGNMENT 16
+#endif
+
+/* ceil(x/y) for integers x > 0 and y > 0. Here, / denotes rational division. */
+#define CEIL_DIV(x, y) (1 + ((x) - 1) / (y))
+
+#define ROUND_TO_ALIGN(size) (CEIL_DIV(size, ALIGNMENT) * ALIGNMENT)
+
+/* Macro for restrict, when available and not in a VERIFY build. */
+#if defined(SECP256K1_BUILD) && defined(VERIFY)
+# define SECP256K1_RESTRICT
+#else
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(3,0)
+#   define SECP256K1_RESTRICT __restrict__
+#  elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#   define SECP256K1_RESTRICT __restrict
+#  else
+#   define SECP256K1_RESTRICT
+#  endif
+# else
+#  define SECP256K1_RESTRICT restrict
+# endif
+#endif
+
+#if defined(__GNUC__)
+# define SECP256K1_GNUC_EXT __extension__
+#else
+# define SECP256K1_GNUC_EXT
+#endif
+
+/* Zero memory if flag == 1. Flag must be 0 or 1. Constant time. */
+static SECP256K1_INLINE void secp256k1_memczero(void *s, size_t len, int flag) {
+    unsigned char *p = (unsigned char *)s;
+    /* Access flag with a volatile-qualified lvalue.
+       This prevents clang from figuring out (after inlining) that flag can
+       take only be 0 or 1, which leads to variable time code. */
+    volatile int vflag = flag;
+    unsigned char mask = -(unsigned char) vflag;
+    while (len) {
+        *p &= ~mask;
+        p++;
+        len--;
+    }
+}
+
+/* Cleanses memory to prevent leaking sensitive info. Won't be optimized out. */
+static SECP256K1_INLINE void secp256k1_memclear(void *ptr, size_t len) {
+#if defined(_MSC_VER)
+    /* SecureZeroMemory is guaranteed not to be optimized out by MSVC. */
+    SecureZeroMemory(ptr, len);
+#elif defined(__GNUC__)
+    /* We use a memory barrier that scares the compiler away from optimizing out the memset.
+     *
+     * Quoting Adam Langley <agl@google.com> in commit ad1907fe73334d6c696c8539646c21b11178f20f
+     * in BoringSSL (ISC License):
+     *    As best as we can tell, this is sufficient to break any optimisations that
+     *    might try to eliminate "superfluous" memsets.
+     * This method is used in memzero_explicit() the Linux kernel, too. Its advantage is that it
+     * is pretty efficient, because the compiler can still implement the memset() efficently,
+     * just not remove it entirely. See "Dead Store Elimination (Still) Considered Harmful" by
+     * Yang et al. (USENIX Security 2017) for more background.
+     */
+    memset(ptr, 0, len);
+    __asm__ __volatile__("" : : "r"(ptr) : "memory");
+#else
+    void *(*volatile const volatile_memset)(void *, int, size_t) = memset;
+    volatile_memset(ptr, 0, len);
+#endif
+#ifdef VERIFY
+    SECP256K1_CHECKMEM_UNDEFINE(ptr, len);
+#endif
+}
+
+/** Semantics like memcmp. Variable-time.
+ *
+ * We use this to avoid possible compiler bugs with memcmp, e.g.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95189
+ */
+static SECP256K1_INLINE int secp256k1_memcmp_var(const void *s1, const void *s2, size_t n) {
+    const unsigned char *p1 = s1, *p2 = s2;
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        int diff = p1[i] - p2[i];
+        if (diff != 0) {
+            return diff;
+        }
+    }
+    return 0;
+}
+
+/* Return 1 if all elements of array s are 0 and otherwise return 0.
+ * Constant-time. */
+static SECP256K1_INLINE int secp256k1_is_zero_array(const unsigned char *s, size_t len) {
+    unsigned char acc = 0;
+    int ret;
+    size_t i;
+
+    for (i = 0; i < len; i++) {
+        acc |= s[i];
+    }
+    ret = (acc == 0);
+    /* acc may contain secret values. Try to explicitly clear it. */
+    secp256k1_memclear(&acc, sizeof(acc));
+    return ret;
+}
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized and non-negative.*/
+static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag) {
+    unsigned int mask0, mask1, r_masked, a_masked;
+    /* Access flag with a volatile-qualified lvalue.
+       This prevents clang from figuring out (after inlining) that flag can
+       take only be 0 or 1, which leads to variable time code. */
+    volatile int vflag = flag;
+
+    /* Casting a negative int to unsigned and back to int is implementation defined behavior */
+    VERIFY_CHECK(*r >= 0 && *a >= 0);
+
+    mask0 = (unsigned int)vflag + ~0u;
+    mask1 = ~mask0;
+    r_masked = ((unsigned int)*r & mask0);
+    a_masked = ((unsigned int)*a & mask1);
+
+    *r = (int)(r_masked | a_masked);
+}
+
+#if defined(USE_FORCE_WIDEMUL_INT128_STRUCT)
+/* If USE_FORCE_WIDEMUL_INT128_STRUCT is set, use int128_struct. */
+# define SECP256K1_WIDEMUL_INT128 1
+# define SECP256K1_INT128_STRUCT 1
+#elif defined(USE_FORCE_WIDEMUL_INT128)
+/* If USE_FORCE_WIDEMUL_INT128 is set, use int128. */
+# define SECP256K1_WIDEMUL_INT128 1
+# define SECP256K1_INT128_NATIVE 1
+#elif defined(USE_FORCE_WIDEMUL_INT64)
+/* If USE_FORCE_WIDEMUL_INT64 is set, use int64. */
+# define SECP256K1_WIDEMUL_INT64 1
+#elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__)
+/* If a native 128-bit integer type exists, use int128. */
+# define SECP256K1_WIDEMUL_INT128 1
+# define SECP256K1_INT128_NATIVE 1
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
+/* On 64-bit MSVC targets (x86_64 and arm64), use int128_struct
+ * (which has special logic to implement using intrinsics on those systems). */
+# define SECP256K1_WIDEMUL_INT128 1
+# define SECP256K1_INT128_STRUCT 1
+#elif SIZE_MAX > 0xffffffff
+/* Systems with 64-bit pointers (and thus registers) very likely benefit from
+ * using 64-bit based arithmetic (even if we need to fall back to 32x32->64 based
+ * multiplication logic). */
+# define SECP256K1_WIDEMUL_INT128 1
+# define SECP256K1_INT128_STRUCT 1
+#else
+/* Lastly, fall back to int64 based arithmetic. */
+# define SECP256K1_WIDEMUL_INT64 1
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+/* Determine the number of trailing zero bits in a (non-zero) 32-bit x.
+ * This function is only intended to be used as fallback for
+ * secp256k1_ctz32_var, but permits it to be tested separately. */
+static SECP256K1_INLINE int secp256k1_ctz32_var_debruijn(uint32_t x) {
+    static const uint8_t debruijn[32] = {
+        0x00, 0x01, 0x02, 0x18, 0x03, 0x13, 0x06, 0x19, 0x16, 0x04, 0x14, 0x0A,
+        0x10, 0x07, 0x0C, 0x1A, 0x1F, 0x17, 0x12, 0x05, 0x15, 0x09, 0x0F, 0x0B,
+        0x1E, 0x11, 0x08, 0x0E, 0x1D, 0x0D, 0x1C, 0x1B
+    };
+    return debruijn[(uint32_t)((x & -x) * 0x04D7651FU) >> 27];
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 64-bit x.
+ * This function is only intended to be used as fallback for
+ * secp256k1_ctz64_var, but permits it to be tested separately. */
+static SECP256K1_INLINE int secp256k1_ctz64_var_debruijn(uint64_t x) {
+    static const uint8_t debruijn[64] = {
+        0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
+        62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
+        63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+        51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
+    };
+    return debruijn[(uint64_t)((x & -x) * 0x022FDD63CC95386DU) >> 58];
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 32-bit x. */
+static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x) {
+    VERIFY_CHECK(x != 0);
+#if (__has_builtin(__builtin_ctz) || SECP256K1_GNUC_PREREQ(3,4))
+    /* If the unsigned type is sufficient to represent the largest uint32_t, consider __builtin_ctz. */
+    if (((unsigned)UINT32_MAX) == UINT32_MAX) {
+        return __builtin_ctz(x);
+    }
+#endif
+#if (__has_builtin(__builtin_ctzl) || SECP256K1_GNUC_PREREQ(3,4))
+    /* Otherwise consider __builtin_ctzl (the unsigned long type is always at least 32 bits). */
+    return __builtin_ctzl(x);
+#else
+    /* If no suitable CTZ builtin is available, use a (variable time) software emulation. */
+    return secp256k1_ctz32_var_debruijn(x);
+#endif
+}
+
+/* Determine the number of trailing zero bits in a (non-zero) 64-bit x. */
+static SECP256K1_INLINE int secp256k1_ctz64_var(uint64_t x) {
+    VERIFY_CHECK(x != 0);
+#if (__has_builtin(__builtin_ctzl) || SECP256K1_GNUC_PREREQ(3,4))
+    /* If the unsigned long type is sufficient to represent the largest uint64_t, consider __builtin_ctzl. */
+    if (((unsigned long)UINT64_MAX) == UINT64_MAX) {
+        return __builtin_ctzl(x);
+    }
+#endif
+#if (__has_builtin(__builtin_ctzll) || SECP256K1_GNUC_PREREQ(3,4))
+    /* Otherwise consider __builtin_ctzll (the unsigned long long type is always at least 64 bits). */
+    return __builtin_ctzll(x);
+#else
+    /* If no suitable CTZ builtin is available, use a (variable time) software emulation. */
+    return secp256k1_ctz64_var_debruijn(x);
+#endif
+}
+
+/* Read a uint32_t in big endian */
+SECP256K1_INLINE static uint32_t secp256k1_read_be32(const unsigned char* p) {
+    return (uint32_t)p[0] << 24 |
+           (uint32_t)p[1] << 16 |
+           (uint32_t)p[2] << 8  |
+           (uint32_t)p[3];
+}
+
+/* Write a uint32_t in big endian */
+SECP256K1_INLINE static void secp256k1_write_be32(unsigned char* p, uint32_t x) {
+    p[3] = x;
+    p[2] = x >>  8;
+    p[1] = x >> 16;
+    p[0] = x >> 24;
+}
+
+/* Read a uint64_t in big endian */
+SECP256K1_INLINE static uint64_t secp256k1_read_be64(const unsigned char* p) {
+    return (uint64_t)p[0] << 56 |
+           (uint64_t)p[1] << 48 |
+           (uint64_t)p[2] << 40 |
+           (uint64_t)p[3] << 32 |
+           (uint64_t)p[4] << 24 |
+           (uint64_t)p[5] << 16 |
+           (uint64_t)p[6] << 8  |
+           (uint64_t)p[7];
+}
+
+/* Write a uint64_t in big endian */
+SECP256K1_INLINE static void secp256k1_write_be64(unsigned char* p, uint64_t x) {
+    p[7] = x;
+    p[6] = x >>  8;
+    p[5] = x >> 16;
+    p[4] = x >> 24;
+    p[3] = x >> 32;
+    p[2] = x >> 40;
+    p[1] = x >> 48;
+    p[0] = x >> 56;
+}
+
+/* Rotate a uint32_t to the right. */
+SECP256K1_INLINE static uint32_t secp256k1_rotr32(const uint32_t x, const unsigned int by) {
+#if defined(_MSC_VER)
+    return _rotr(x, by);  /* needs <stdlib.h> */
+#else
+    /* Reduce rotation amount to avoid UB when shifting. */
+    const unsigned int mask = CHAR_BIT * sizeof(x) - 1;
+    /* Turned into a rot instruction by GCC and clang. */
+    return (x >> (by & mask)) | (x << ((-by) & mask));
+#endif
+}
+
+#endif /* SECP256K1_UTIL_H */
--- a/signature_libsecp256k1.go
+++ b/signature_libsecp256k1.go
@@ -3,10 +3,23 @@
 package nostr

 /*
-#cgo LDFLAGS: -lsecp256k1
-#include <secp256k1.h>
-#include <secp256k1_schnorrsig.h>
-#include <secp256k1_extrakeys.h>
+#cgo CFLAGS: -I${SRCDIR}/libsecp256k1/include -I${SRCDIR}/libsecp256k1/src
+#cgo CFLAGS: -DECMULT_GEN_PREC_BITS=4
+#cgo CFLAGS: -DECMULT_WINDOW_SIZE=15
+#cgo CFLAGS: -DENABLE_MODULE_SCHNORRSIG=1
+#cgo CFLAGS: -DENABLE_MODULE_EXTRAKEYS=1
+
+#include "./libsecp256k1/src/secp256k1.c"
+#include "./libsecp256k1/src/precomputed_ecmult.c"
+#include "./libsecp256k1/src/precomputed_ecmult_gen.c"
+#include "./libsecp256k1/src/ecmult_gen.h"
+#include "./libsecp256k1/src/ecmult.h"
+#include "./libsecp256k1/src/modules/extrakeys/main_impl.h"
+#include "./libsecp256k1/src/modules/schnorrsig/main_impl.h"
+
+#include "./libsecp256k1/include/secp256k1.h"
+#include "./libsecp256k1/include/secp256k1_extrakeys.h"
+#include "./libsecp256k1/include/secp256k1_schnorrsig.h"
 */
 import "C"